diff --git a/CMakePresets.json b/CMakePresets.json index 1035722b..56db1326 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -16,6 +16,7 @@ "BUILD_SHARED_LIBS": "OFF", "FETCHCONTENT_QUIET": "OFF", + "FMT_MODULE": "OFF", "VULKAN_HEADERS_ENABLE_INSTALL": "ON", "ENTT_INCLUDE_HEADERS": "ON", diff --git a/Obelisk/EntryPoint.cpp b/Obelisk/EntryPoint.cpp index afb7e80a..86c84a1b 100644 --- a/Obelisk/EntryPoint.cpp +++ b/Obelisk/EntryPoint.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #ifdef ZENGINE_PLATFORM @@ -22,6 +23,8 @@ int applicationEntryPoint(int argc, char* argv[]) LoggerConfiguration logger_cfg = {}; Logger::Initialize(arena, logger_cfg); + Helpers::ThreadPoolHelper::Initialize(); + GameApplicationPtr app = nullptr; CLI::App cli{"ObeliskCLI"}; diff --git a/Resources/Shaders/fragment_common.glsl b/Resources/Shaders/fragment_common.glsl index 5bb2c05a..7f125f25 100644 --- a/Resources/Shaders/fragment_common.glsl +++ b/Resources/Shaders/fragment_common.glsl @@ -61,7 +61,8 @@ layout(std140, set = 0, binding = 5) readonly buffer MatSB } MaterialDataBuffer; -layout(set = 1, binding = 0) uniform sampler2D TextureArray[]; +layout(set = 1, binding = 0) uniform texture2D TextureArray[]; +layout(set = 1, binding = 1) uniform sampler LinearWrapSampler; MaterialData FetchMaterial(uint dataIndex) { diff --git a/Resources/Shaders/g_buffer.frag b/Resources/Shaders/g_buffer.frag index 3ab9b1ee..0fad4f35 100644 --- a/Resources/Shaders/g_buffer.frag +++ b/Resources/Shaders/g_buffer.frag @@ -24,18 +24,18 @@ void main() if (material.AlbedoMap < INVALID_MAP_HANDLE) { uint texId = uint(material.AlbedoMap); - OutAlbedo = texture(TextureArray[nonuniformEXT(texId)], TexCoord); + OutAlbedo = texture(sampler2D(TextureArray[nonuniformEXT(texId)], LinearWrapSampler), TexCoord); } if (material.SpecularMap < INVALID_MAP_HANDLE) { uint texId = uint(material.SpecularMap); - OutSpecular = texture(TextureArray[nonuniformEXT(texId)], TexCoord); + OutSpecular = texture(sampler2D(TextureArray[nonuniformEXT(texId)], LinearWrapSampler), TexCoord); } if (material.NormalMap < INVALID_MAP_HANDLE) { uint texId = uint(material.NormalMap); - OutNormal = texture(TextureArray[nonuniformEXT(texId)], TexCoord).rgb; + OutNormal = texture(sampler2D(TextureArray[nonuniformEXT(texId)], LinearWrapSampler), TexCoord).rgb; } -} \ No newline at end of file +} diff --git a/Resources/Shaders/imgui.frag b/Resources/Shaders/imgui.frag index 6ce0502e..dca5fd7a 100644 --- a/Resources/Shaders/imgui.frag +++ b/Resources/Shaders/imgui.frag @@ -1,10 +1,10 @@ #version 460 core #extension GL_EXT_nonuniform_qualifier : require -#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable layout(location = 0) out vec4 fColor; -layout(set = 0, binding = 0) uniform sampler2D _unused; -layout(set = 1, binding = 0) uniform sampler2D TextureArray[]; +layout(set = 0, binding = 0) uniform sampler _unused; +layout(set = 1, binding = 0) uniform texture2D TextureArray[]; +layout(set = 1, binding = 1) uniform sampler LinearWrapSampler; layout(location = 0) in struct { @@ -14,10 +14,7 @@ layout(location = 0) in struct void main() { - uint texId = uint(In.TexData.z); - if (texId < 0xFFFFFFFFu) - { - vec4 texVal = texture(TextureArray[nonuniformEXT(texId)], In.TexData.xy); - fColor = In.Color * texVal; - } -} \ No newline at end of file + uint texId = uint(floor(In.TexData.z + 0.5)); + vec4 texVal = texture(sampler2D(TextureArray[nonuniformEXT(texId)], LinearWrapSampler), In.TexData.xy); + fColor = In.Color * texVal; +} diff --git a/Resources/Shaders/imgui.vert b/Resources/Shaders/imgui.vert index d5fa11a8..b9ad2957 100644 --- a/Resources/Shaders/imgui.vert +++ b/Resources/Shaders/imgui.vert @@ -8,6 +8,7 @@ layout(push_constant) uniform uPushConstant vec2 uScale; vec2 uTranslate; uint index; + uint _padding; } pc; diff --git a/Resources/Shaders/skybox.frag b/Resources/Shaders/skybox.frag index 50a1b157..05386bcb 100644 --- a/Resources/Shaders/skybox.frag +++ b/Resources/Shaders/skybox.frag @@ -2,9 +2,10 @@ layout(location = 0) in vec3 dir; layout(location = 0) out vec4 outColor; -layout(set = 0, binding = 1) uniform samplerCube EnvMap; +layout(set = 0, binding = 1) uniform textureCube EnvMap; +layout(set = 1, binding = 1) uniform sampler LinearWrapSampler; void main() { - outColor = texture(EnvMap, dir); + outColor = texture(samplerCube(EnvMap, LinearWrapSampler), dir); } \ No newline at end of file diff --git a/ZEngine/ZEngine/Applications/AppRenderPipeline.cpp b/ZEngine/ZEngine/Applications/AppRenderPipeline.cpp index b9658e65..22c40100 100644 --- a/ZEngine/ZEngine/Applications/AppRenderPipeline.cpp +++ b/ZEngine/ZEngine/Applications/AppRenderPipeline.cpp @@ -1,5 +1,6 @@ #include #include +#include using namespace ZEngine::Core::Containers; @@ -7,13 +8,23 @@ namespace ZEngine::Applications { void AppRenderPipeline::Initialize(Hardwares::VulkanDevicePtr device) { - Device = device; + Device = device; + RenderWorkerThreadCount = Device->CommandBufferMgr->TotalThreadCount - 1u; + UICommandBufferIndex = RenderMainThreadIndex + 1u; + Device->Arena->CreateSubArena(ZMega(30), &LocalArena); + SceneRenderer = ZPushStructCtor(Device->Arena, Rendering::Renderers::GraphicRenderer); ImguiRenderer = ZPushStructCtor(Device->Arena, Rendering::Renderers::ImGUIRenderer); - Device->Arena->CreateSubArena(ZMega(30), &LocalArena); SceneRenderer->Initialize(Device); ImguiRenderer->Initialize(Device); + + for (size_t i = 0; i < MaxMailBoxBufferCount; ++i) + { + RenderPayloads[i].UIOverlay.IndexedCmds.resize(100); + RenderPayloads[i].UIOverlay.ScissorCmds.resize(100); + RenderPayloads[i].UIOverlay.TextureIds.resize(100); + } } void AppRenderPipeline::Shutdown() @@ -33,19 +44,45 @@ namespace ZEngine::Applications void AppRenderPipeline::BeginFrame() { - Device->NewFrame(); - CurrentCmdBuf = Device->GetCommandBuffer(); + auto swapchain = Device->SwapchainPtr; + + swapchain->AcquireNextImage(CurrentMailBoxBufferHead); + + for (uint8_t thread_idx = 0; thread_idx < Device->CommandBufferMgr->TotalThreadCount; ++thread_idx) + { + Device->CommandBufferMgr->ResetPool(swapchain->CurrentFrame->Index, thread_idx); + Device->AsyncResLoader->ResetCommandBuffers(swapchain->CurrentFrame->Index, thread_idx); + } + + Device->AsyncResLoader->CompleteDeferrals(); + + // uint8_t render_worker_thread_idx = RenderThreadIndex + 1; + // for (uint8_t worker_thread_idx = 0; worker_thread_idx < RenderWorkerThreadCount; ++worker_thread_idx) + // { + // auto thread_idx = render_worker_thread_idx + worker_thread_idx; + // } + CurrentCmdBuf = Device->CommandBufferMgr->GetCommandBuffer(Rendering::QueueType::GRAPHIC_QUEUE, swapchain->CurrentFrame->Index, RenderMainThreadIndex, 0, false); + vkResetCommandBuffer(CurrentCmdBuf->GetHandle(), 0); + CurrentCmdBuf->ResetState(); + CurrentCmdBuf->Begin(); } void AppRenderPipeline::EndFrame() { - Device->EnqueueCommandBuffer(CurrentCmdBuf); - Device->Present(); + Device->AsyncResLoader->SubmitAsyncJobs(); + Device->CommandBufferMgr->EnqueueBuffer(CurrentCmdBuf); + Device->CommandBufferMgr->EndEnqueuedBuffers(); + + Device->SwapchainPtr->Present(); } void AppRenderPipeline::RenderScene(Rendering::Cameras::CameraPtr camera, Rendering::Scenes::RenderScenePtr scene) { - if (scene->TransformBufferDirty[Device->CurrentFrameIndex].load(std::memory_order_acquire) || scene->MeshAllocationDirty[Device->CurrentFrameIndex].load(std::memory_order_acquire)) + auto swpachain = Device->SwapchainPtr; + auto frame_index = swpachain->CurrentFrame->Index; + auto thread_index = RenderMainThreadIndex; + + if (scene->TransformBufferDirty[Device->SwapchainPtr->CurrentFrame->Index].load(std::memory_order_acquire) || scene->MeshAllocationDirty[Device->SwapchainPtr->CurrentFrame->Index].load(std::memory_order_acquire)) { auto gpu_scene_data = SceneRenderer->RenderSceneData; @@ -56,21 +93,21 @@ namespace ZEngine::Applications auto indirect_buffer_set = Device->IndirectBufferSetManager.Access(gpu_scene_data->IndirectBufferHandle); - auto vtx_buffer = vtx_buffer_set->At(Device->CurrentFrameIndex); - auto idx_buffer = idx_buffer_set->At(Device->CurrentFrameIndex); - auto transform_buffer = transform_buffer_set->At(Device->CurrentFrameIndex); - auto rd_buffer = rd_buffer_set->At(Device->CurrentFrameIndex); - auto indirect_buffer = indirect_buffer_set->At(Device->CurrentFrameIndex); + auto vtx_buffer = vtx_buffer_set->At(Device->SwapchainPtr->CurrentFrame->Index); + auto idx_buffer = idx_buffer_set->At(Device->SwapchainPtr->CurrentFrame->Index); + auto transform_buffer = transform_buffer_set->At(Device->SwapchainPtr->CurrentFrame->Index); + auto rd_buffer = rd_buffer_set->At(Device->SwapchainPtr->CurrentFrame->Index); + auto indirect_buffer = indirect_buffer_set->At(Device->SwapchainPtr->CurrentFrame->Index); auto& suballocs = scene->NodeSubMeshesAllocations; - if (scene->TransformBufferDirty[Device->CurrentFrameIndex].exchange(false, std::memory_order_acquire)) + if (scene->TransformBufferDirty[Device->SwapchainPtr->CurrentFrame->Index].exchange(false, std::memory_order_acquire)) { auto transform_data_view = ArrayView{scene->GlobalTransforms}; - transform_buffer->Write(transform_data_view); + transform_buffer->Write(frame_index, thread_index, transform_data_view); } - if (scene->MeshAllocationDirty[Device->CurrentFrameIndex].exchange(false, std::memory_order_acquire)) + if (scene->MeshAllocationDirty[Device->SwapchainPtr->CurrentFrame->Index].exchange(false, std::memory_order_acquire)) { auto scratch = ZGetScratch(&LocalArena); @@ -100,12 +137,12 @@ namespace ZEngine::Applications auto sub_mesh_alloc_view = ArrayView{SubMeshAllocations}; auto indirect_commands_view = ArrayView{DrawIndirectCommands}; - vtx_buffer->Write(vertex_data_view); - idx_buffer->Write(index_data_view); + vtx_buffer->Write(frame_index, thread_index, vertex_data_view); + idx_buffer->Write(frame_index, thread_index, index_data_view); - rd_buffer->Write(sub_mesh_alloc_view); + rd_buffer->Write(frame_index, thread_index, sub_mesh_alloc_view); - indirect_buffer->Write(indirect_commands_view); + indirect_buffer->Write(frame_index, thread_index, indirect_commands_view); ZReleaseScratch(scratch); } @@ -113,7 +150,7 @@ namespace ZEngine::Applications // Todo (Kernel) : When we'll start considering multithreaded support // we might want to renderer->EnqueueAsync({command_buffer, {camera, frame_data} }) - SceneRenderer->DrawScene(CurrentCmdBuf, camera); + SceneRenderer->DrawScene(frame_index, thread_index, CurrentCmdBuf, camera); } void AppRenderPipeline::BeginOverlayFrame() @@ -121,8 +158,77 @@ namespace ZEngine::Applications ImguiRenderer->NewFrame(); } + void AppRenderPipeline::FillOverlayPayload(Rendering::Renderers::RenderOverlayPayload& payload) + { + ImguiRenderer->PreparePayload(payload); + } + + void AppRenderPipeline::RenderOverlay(const Rendering::Renderers::RenderOverlayPayload& payload) + { + if (payload.VertexCount == 0 && payload.IndexCount == 0) + { + return; + } + + auto swpachain = Device->SwapchainPtr; + auto frame_index = swpachain->CurrentFrame->Index; + auto thread_index = RenderMainThreadIndex; + + auto current_framebuffer = Device->SwapchainPtr->SwapchainFramebuffers[Device->SwapchainPtr->CurrentFrame->ImageIndex]; + + CurrentCmdBuf->BeginRenderPass(ImguiRenderer->UIPass, current_framebuffer, true); + { + auto vtx_data_view = ArrayView{payload.VertexData.data(), payload.VertexData.size()}; + auto idx_data_view = ArrayView{payload.IndexData.data(), payload.IndexData.size()}; + + auto vertex_buffer_set = Device->VertexBufferSetManager.Access(payload.VBHandle); + auto index_buffer_set = Device->IndexBufferSetManager.Access(payload.IdxBHandle); + + auto vertex_buffer = vertex_buffer_set->At(Device->SwapchainPtr->CurrentFrame->Index); + auto index_buffer = index_buffer_set->At(Device->SwapchainPtr->CurrentFrame->Index); + + vertex_buffer->Write(frame_index, thread_index, vtx_data_view); + index_buffer->Write(frame_index, thread_index, idx_data_view); + + auto ui_second_cb = Device->CommandBufferMgr->GetCommandBuffer(Rendering::QueueType::GRAPHIC_QUEUE, Device->SwapchainPtr->CurrentFrame->Index, RenderMainThreadIndex, UICommandBufferIndex, false); + ui_second_cb->ResetState(); + ui_second_cb->BeginSecondary(ImguiRenderer->UIPass, current_framebuffer); + ui_second_cb->SetViewport(ImguiRenderer->UIPass->GetRenderAreaWidth(), ImguiRenderer->UIPass->GetRenderAreaHeight()); + + ui_second_cb->BindPipeline(Rendering::Specifications::PipelineBindPoint::GRAPHIC, ImguiRenderer->UIPass->Pipeline); + + ui_second_cb->BindVertexBuffer(*vertex_buffer); + ui_second_cb->BindIndexBuffer(*index_buffer, payload.IsIndexBufferUint16 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32); + + Rendering::Renderers::PushConstantData pc_data = {}; + pc_data.Scale[0] = payload.Pc[0]; + pc_data.Scale[1] = payload.Pc[1]; + + pc_data.Translate[0] = payload.Pc[2]; + pc_data.Translate[1] = payload.Pc[3]; + + for (uint32_t i = 0; i < payload.DrawDataIndex; ++i) + { + const auto& scissor_cmd = payload.ScissorCmds[i]; + const auto& indexed_cmd = payload.IndexedCmds[i]; + + ui_second_cb->SetScissor(scissor_cmd.w, scissor_cmd.h, scissor_cmd.x, scissor_cmd.y); + pc_data.TextureId = payload.TextureIds[i]; + ui_second_cb->PushConstants(VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(Rendering::Renderers::PushConstantData), &pc_data); + ui_second_cb->BindDescriptorSets(Device->SwapchainPtr->CurrentFrame->Index); + ui_second_cb->DrawIndexed(indexed_cmd.IdxCount, indexed_cmd.InstanceCount, indexed_cmd.FirstIndex, indexed_cmd.VertexOffset, indexed_cmd.FirstInstance); + } + + ui_second_cb->End(); + + CurrentCmdBuf->ExecuteSecondaryCommandBuffers(ArrayView{ui_second_cb, 1}); + } + + CurrentCmdBuf->EndRenderPass(); + } + void AppRenderPipeline::EndOverlayFrame() { - ImguiRenderer->DrawFrame(CurrentCmdBuf); + ImguiRenderer->EndFrame(); } } // namespace ZEngine::Applications diff --git a/ZEngine/ZEngine/Applications/AppRenderPipeline.h b/ZEngine/ZEngine/Applications/AppRenderPipeline.h index 3b132860..fe8c7aee 100644 --- a/ZEngine/ZEngine/Applications/AppRenderPipeline.h +++ b/ZEngine/ZEngine/Applications/AppRenderPipeline.h @@ -5,14 +5,32 @@ namespace ZEngine::Applications { - struct AppRenderPipeline + struct RenderPayload { - Hardwares::VulkanDevicePtr Device = nullptr; - Rendering::Renderers::GraphicRendererPtr SceneRenderer = nullptr; - Rendering::Renderers::ImGUIRendererPtr ImguiRenderer = nullptr; - Hardwares::CommandBufferPtr CurrentCmdBuf = nullptr; + uint32_t RenderTargetW = 0; + uint32_t RenderTargetH = 0; + PaddedAtomic RenderUIOverlay = {.value = false}; + PaddedAtomic ResizeRenderTarget = {.value = false}; + Rendering::Cameras::CameraPtr Camera = nullptr; + Rendering::Scenes::RenderScenePtr Scene = nullptr; + Rendering::Renderers::RenderOverlayPayload UIOverlay = {}; + }; - ZEngine::Core::Memory::ArenaAllocator LocalArena = {}; + struct AppRenderPipeline + { + const uint8_t MaxMailBoxBufferCount = 3; + const uint8_t RenderMainThreadIndex = 0; + uint8_t RenderWorkerThreadCount = 0; + uint8_t UICommandBufferIndex = 0xff; + uint32_t CurrentMailBoxBufferHead = 0; + PaddedAtomic MailBoxBufferHead = {.value = 0}; + PaddedAtomic MailBoxBufferTail = {.value = 0}; + RenderPayload RenderPayloads[3] = {}; + ZEngine::Core::Memory::ArenaAllocator LocalArena = {}; + Hardwares::VulkanDevicePtr Device = nullptr; + Rendering::Renderers::GraphicRendererPtr SceneRenderer = nullptr; + Rendering::Renderers::ImGUIRendererPtr ImguiRenderer = nullptr; + Hardwares::CommandBufferPtr CurrentCmdBuf = nullptr; void Initialize(Hardwares::VulkanDevicePtr device); void Shutdown(); @@ -26,6 +44,8 @@ namespace ZEngine::Applications void BeginOverlayFrame(); void EndOverlayFrame(); + void FillOverlayPayload(Rendering::Renderers::RenderOverlayPayload& payload); + void RenderOverlay(const Rendering::Renderers::RenderOverlayPayload& payload); }; ZDEFINE_PTR(AppRenderPipeline); diff --git a/ZEngine/ZEngine/Applications/GameApplication.cpp b/ZEngine/ZEngine/Applications/GameApplication.cpp index 05de1bf7..aa87ed6f 100644 --- a/ZEngine/ZEngine/Applications/GameApplication.cpp +++ b/ZEngine/ZEngine/Applications/GameApplication.cpp @@ -47,28 +47,18 @@ namespace ZEngine::Applications OnEvent(e); } - void GameApplication::Render() + void GameApplication::PrepareScene(RenderPayload& payload) { RenderTargetResizeRequest request = {}; if (State->RenderTargetResizeRequests.Pop(request)) { - RenderPipeline->ResizeRenderTarget(request.Width, request.Height); + payload.ResizeRenderTarget.value.store(true, std::memory_order_release); + payload.RenderTargetW = request.Width; + payload.RenderTargetH = request.Height; } - RenderPipeline->BeginFrame(); - - OnPreRender(); - RenderPipeline->RenderScene(CameraController->GetCamera(), CurrentScene); - OnPostRender(); - - if (EnableRenderOverlay) - { - RenderPipeline->BeginOverlayFrame(); - OnRenderUI(); - RenderPipeline->EndOverlayFrame(); - } - - RenderPipeline->EndFrame(); + payload.Scene = CurrentScene; + payload.Camera = CameraController->GetCamera(); } void GameApplication::Shutdown() diff --git a/ZEngine/ZEngine/Applications/GameApplication.h b/ZEngine/ZEngine/Applications/GameApplication.h index 0f6e5c2e..265a5ab7 100644 --- a/ZEngine/ZEngine/Applications/GameApplication.h +++ b/ZEngine/ZEngine/Applications/GameApplication.h @@ -42,7 +42,7 @@ namespace ZEngine::Applications void Update(Core::TimeStep dt); void ProcessEvent(Core::CoreEvent&); void Run(); - void Render(); + void PrepareScene(RenderPayload&); void Shutdown(); virtual void OverrideWindowConfiguration() = 0; diff --git a/ZEngine/ZEngine/Engine.cpp b/ZEngine/ZEngine/Engine.cpp index ef854962..70091230 100644 --- a/ZEngine/ZEngine/Engine.cpp +++ b/ZEngine/ZEngine/Engine.cpp @@ -1,17 +1,27 @@ #include #include #include +#include #include #include #include +#include +#include + +#ifdef __APPLE__ +#include +#include +#endif + +using namespace std::chrono_literals; namespace ZEngine { - static bool s_request_terminate = false; - static std::shared_mutex g_mutex = {}; + static std::atomic_bool s_request_terminate = false; static EngineContextPtr g_engine_ctx = nullptr; static Applications::GameApplicationPtr g_app = nullptr; static Applications::AppRenderPipelinePtr g_appRenderPipeline = nullptr; + static std::thread g_render_thread = {}; void Engine::Initialize(ZEngine::Core::Memory::ArenaAllocator* arena, Windows::WindowConfigurationPtr window_cfg_ptr, Applications::GameApplicationPtr app) { @@ -23,9 +33,9 @@ namespace ZEngine window->Initialize(arena, *window_cfg_ptr); g_engine_ctx->Window = window; - g_appRenderPipeline = ZPushStruct(arena, Applications::AppRenderPipeline); + g_appRenderPipeline = ZPushStructCtor(arena, Applications::AppRenderPipeline); - g_engine_ctx->Device->Initialize(arena, window); + g_engine_ctx->Device->Initialize(arena, window, (Helpers::ThreadPoolHelper::Pool->MaxThreadCount / 2u) /*, k_mailbox_buffer_size */); g_appRenderPipeline->Initialize(g_engine_ctx->Device); Managers::AssetManager::Initialize(arena, g_engine_ctx->Device, app->WorkingSpacePath); @@ -39,21 +49,19 @@ namespace ZEngine void Engine::Deinitialize() { - std::unique_lock l(g_mutex); - if (g_engine_ctx->Window) { g_engine_ctx->Window->Deinitialize(); } + g_render_thread.join(); g_appRenderPipeline->Shutdown(); - g_engine_ctx->Device->Deinitialize(); } void Engine::Dispose() { - s_request_terminate = false; + s_request_terminate.store(false, std::memory_order_release); Managers::AssetManager::Shutdown(); g_engine_ctx->Device->Dispose(); @@ -62,23 +70,22 @@ namespace ZEngine bool Engine::OnEngineClosed(Event::EngineClosedEvent& event) { - s_request_terminate = true; + s_request_terminate.store(true, std::memory_order_release); return true; } - void Engine::Run() + void Engine::MainThreadRun() { - Managers::AssetManager::Run(); - - s_request_terminate = false; - while (auto window = g_engine_ctx->Window) + while (!s_request_terminate.load(std::memory_order_acquire)) { - if (s_request_terminate) + if (!g_engine_ctx || !g_engine_ctx->Window || !g_engine_ctx->Device) { break; } - float dt = window->GetDeltaTime(); + auto window = g_engine_ctx->Window; + + float dt = window->GetDeltaTime(); window->PollEvent(); @@ -87,16 +94,101 @@ namespace ZEngine continue; } - /*On Update*/ g_app->Update(dt); - /*On Render*/ - g_app->Render(); + auto pipeline = g_app->RenderPipeline; + + uint32_t head = pipeline->MailBoxBufferHead.value.load(std::memory_order_acquire); + uint32_t next = (head + 1) % pipeline->MaxMailBoxBufferCount; + uint32_t tail = pipeline->MailBoxBufferTail.value.load(std::memory_order_acquire); + + // Buffer full, drop frame (non-blocking) + if (next == tail) + { + continue; + } + + auto& r_payload = pipeline->RenderPayloads[head]; + r_payload.UIOverlay.DrawDataIndex = 0; + r_payload.RenderUIOverlay.value.store(false, std::memory_order_release); + + if (g_app->EnableRenderOverlay) + { + pipeline->BeginOverlayFrame(); + g_app->OnRenderUI(); + pipeline->EndOverlayFrame(); + + r_payload.RenderUIOverlay.value.store(true, std::memory_order_release); + pipeline->FillOverlayPayload(r_payload.UIOverlay); + } + + g_app->PrepareScene(r_payload); + + pipeline->MailBoxBufferHead.value.store(next, std::memory_order_release); } + } - if (s_request_terminate) + void Engine::RenderThreadRun() + { +#ifdef __APPLE__ + pthread_setname_np("RenderThread"); + thread_port_t thread_port = pthread_mach_thread_np(pthread_self()); + thread_time_constraint_policy_data_t policy; + policy.period = 50000; + policy.computation = 20000; + policy.constraint = 40000; + policy.preemptible = 1; + + kern_return_t kr = thread_policy_set(thread_port, THREAD_TIME_CONSTRAINT_POLICY, (thread_policy_t) &policy, THREAD_TIME_CONSTRAINT_POLICY_COUNT); +#endif + while (true) { - Deinitialize(); + if (s_request_terminate.load(std::memory_order_acquire)) + { + break; + } + + auto pipeline = g_app->RenderPipeline; + + uint32_t tail = pipeline->MailBoxBufferTail.value.load(std::memory_order_acquire); + uint32_t head = pipeline->MailBoxBufferHead.value.load(std::memory_order_acquire); + + // Buffer empty + if (tail == head) + { + std::this_thread::sleep_for(std::chrono::microseconds(50)); + continue; + } + + pipeline->CurrentMailBoxBufferHead = tail; + Applications::RenderPayload& r_payload = pipeline->RenderPayloads[tail]; + + if (r_payload.ResizeRenderTarget.value.load(std::memory_order_acquire)) + { + pipeline->ResizeRenderTarget(r_payload.RenderTargetW, r_payload.RenderTargetH); + r_payload.ResizeRenderTarget.value.store(false, std::memory_order_release); + } + + pipeline->BeginFrame(); + pipeline->RenderScene(r_payload.Camera, r_payload.Scene); + if (r_payload.RenderUIOverlay.value.load(std::memory_order_acquire)) + { + pipeline->RenderOverlay(r_payload.UIOverlay); + } + pipeline->EndFrame(); + + uint32_t next = (tail + 1) % pipeline->MaxMailBoxBufferCount; + + pipeline->MailBoxBufferTail.value.store(next, std::memory_order_release); } } + + void Engine::Run() + { + Managers::AssetManager::Run(); + g_render_thread = std::thread(Engine::RenderThreadRun); + MainThreadRun(); + + Deinitialize(); + } } // namespace ZEngine diff --git a/ZEngine/ZEngine/Engine.h b/ZEngine/ZEngine/Engine.h index 1ab740a7..3847a52c 100644 --- a/ZEngine/ZEngine/Engine.h +++ b/ZEngine/ZEngine/Engine.h @@ -22,6 +22,9 @@ namespace ZEngine static void Dispose(); static bool OnEngineClosed(Event::EngineClosedEvent&); + static void MainThreadRun(); + static void RenderThreadRun(); + private: Engine() = delete; Engine(const Engine&) = delete; diff --git a/ZEngine/ZEngine/Hardwares/AsyncResourceLoader.cpp b/ZEngine/ZEngine/Hardwares/AsyncResourceLoader.cpp index 3a4f79c1..bdddef40 100644 --- a/ZEngine/ZEngine/Hardwares/AsyncResourceLoader.cpp +++ b/ZEngine/ZEngine/Hardwares/AsyncResourceLoader.cpp @@ -17,23 +17,453 @@ using namespace ZEngine::Helpers; using namespace ZEngine::Rendering::Specifications; using namespace ZEngine::Rendering; +using namespace ZEngine::Rendering::Primitives; +using namespace ZEngine::Helpers; namespace ZEngine::Hardwares { + void AsyncResourceLoader::Initialize(VulkanDevice* device) { - Device = device; - BufferManager = ZPushStructCtor(Device->Arena, CommandBufferManager); + Device = device; + TotalCommandBufferCount = Device->CommandBufferMgr->MaxBufferPerPool * Device->CommandBufferMgr->MaxBufferPerPool; + + Timelines.init(Device->Arena, Device->CommandBufferMgr->TotalPoolCount, Device->CommandBufferMgr->TotalPoolCount); + NextValues.init(Device->Arena, Device->CommandBufferMgr->TotalPoolCount, Device->CommandBufferMgr->TotalPoolCount); + RetireValues.init(Device->Arena, Device->CommandBufferMgr->TotalPoolCount, Device->CommandBufferMgr->TotalPoolCount); + + for (uint32_t i = 0; i < Device->CommandBufferMgr->TotalPoolCount; ++i) + { + Timelines[i] = ZPushStructCtorArgs(Device->Arena, Rendering::Primitives::Semaphore, Device, true); + RetireValues[i].init(Device->Arena, TotalCommandBufferCount, TotalCommandBufferCount); + NextValues[i].store(1, std::memory_order_release); + } + + if (Device->HasSeperateTransfertQueueFamily) + { + TransferTimelines.init(Device->Arena, Device->CommandBufferMgr->TotalPoolCount, Device->CommandBufferMgr->TotalPoolCount); + TransferNextValues.init(Device->Arena, Device->CommandBufferMgr->TotalPoolCount, Device->CommandBufferMgr->TotalPoolCount); + TransferRetireValues.init(Device->Arena, Device->CommandBufferMgr->TotalPoolCount, Device->CommandBufferMgr->TotalPoolCount); + + for (uint32_t i = 0; i < Device->CommandBufferMgr->TotalPoolCount; ++i) + { + TransferTimelines[i] = ZPushStructCtorArgs(Device->Arena, Rendering::Primitives::Semaphore, Device, true); + TransferRetireValues[i].init(Device->Arena, TotalCommandBufferCount, TotalCommandBufferCount); + TransferNextValues[i].store(1, std::memory_order_release); + } + } + } + + void AsyncResourceLoader::Submit(UploadType type, uint8_t frame_index, uint8_t thread_index, const UploadRequest& request) + { + switch (type) + { + case UploadType::TEXTURE_BUFFER: + case UploadType::TEXTURE_BUFFER_LARGE: + UploadTextureBuffer(frame_index, thread_index, request.TextureUpload.TexHandle, request.TextureUpload.Data); + break; + + case UploadType::BUFFER: + UploadBuffer(frame_index, thread_index, request.BufferUpload.Buffer, request.BufferUpload.Data, request.BufferUpload.Offset, request.BufferUpload.ByteSize); + break; + + case UploadType::STAGING_BUFFER: + UploadFromStagingBuffer(frame_index, thread_index, request.BufferUpload.Buffer, request.BufferUpload.Data, request.BufferUpload.Offset, request.BufferUpload.ByteSize); + break; + + case UploadType::BUFFER_CLEAR: + ClearBuffer(frame_index, thread_index, request.BufferUpload.Buffer, request.BufferUpload.Offset, request.BufferUpload.ByteSize, request.BufferUpload.ClearValue); + break; + + default: + break; + } + } + + void AsyncResourceLoader::CompleteDeferrals() + { + while (!DeferralUploadQueue.Empty()) + { + DeferralUpload deferral = {}; + DeferralUploadQueue.Pop(deferral); + if (deferral.Type == UploadType::TEXTURE_BUFFER_LARGE) + { + auto& buf = std::get>(deferral.Buffer); + Submit( + deferral.Type, + deferral.FrameIdx, + deferral.ThreadIdx, + UploadRequest{ + .TextureUpload = {.Data = buf.data(), .TexHandle = deferral.TexHandle} + }); + } + else if (deferral.Type == UploadType::TEXTURE_BUFFER) + { + auto& buf = std::get(deferral.Buffer); + Submit( + deferral.Type, + deferral.FrameIdx, + deferral.ThreadIdx, + UploadRequest{ + .TextureUpload = {.Data = buf, .TexHandle = deferral.TexHandle} + }); + } + } + } + + void AsyncResourceLoader::SubmitDeferral(DeferralUpload&& deferral) + { + DeferralUploadQueue.Emplace(std::forward(deferral)); + } + + void AsyncResourceLoader::UploadBuffer(uint8_t frame_index, uint8_t thread_index, BufferView* const buffer_view, const void* data, uint32_t offset, size_t byte_size) + { + if (!buffer_view || !(*buffer_view) || !data || byte_size == 0) + { + return; + } + + VkMemoryPropertyFlags mem_prop_flags; + vmaGetAllocationMemoryProperties(Device->VmaAllocatorValue, buffer_view->Allocation, &mem_prop_flags); + + if (mem_prop_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + { + ZENGINE_VALIDATE_ASSERT(vmaCopyMemoryToAllocation(Device->VmaAllocatorValue, data, buffer_view->Allocation, offset, byte_size) == VK_SUCCESS, "Failed to perform memory copy operation") + + // flushing the allocation so the GPU can see it + if (!(mem_prop_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) + { + vmaFlushAllocation(Device->VmaAllocatorValue, buffer_view->Allocation, offset, byte_size); + } + + VkAccessFlags dst_access_mask = VK_ACCESS_NONE; + VkPipelineStageFlags dst_pipeline_stage = VK_PIPELINE_STAGE_TRANSFER_BIT; + switch (buffer_view->Type) + { + case BufferType::VERTEX: + dst_access_mask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; + dst_pipeline_stage = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + break; + + case BufferType::INDEX: + dst_access_mask = VK_ACCESS_INDEX_READ_BIT; + dst_pipeline_stage = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + break; + + case BufferType::UNIFORM: + dst_access_mask = VK_ACCESS_UNIFORM_READ_BIT; + dst_pipeline_stage = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT; + break; + + case BufferType::STORAGE: + dst_access_mask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + dst_pipeline_stage = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + break; + + case BufferType::INDIRECT: + dst_access_mask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + dst_pipeline_stage = VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; + break; + default: + dst_pipeline_stage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + break; + } + + uint32_t i = 0; + uint32_t pool_index = (frame_index * Device->CommandBufferMgr->TotalThreadCount) + thread_index; + auto& retire_values = RetireValues[pool_index]; + + for (; i < retire_values.size(); ++i) + { + if (retire_values[i] == 0) + { + break; + } + } + + auto command_buffer = Device->CommandBufferMgr->GetInstantCommandBuffer(QueueType::GRAPHIC_QUEUE, frame_index, thread_index, i); + + VkBufferMemoryBarrier bufMemBarrier = {VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER}; + bufMemBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + bufMemBarrier.dstAccessMask = dst_access_mask; + bufMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.buffer = buffer_view->Handle; + bufMemBarrier.offset = offset; + bufMemBarrier.size = byte_size; + + // It's important to insert a buffer memory barrier here to ensure writing to the buffer has finished. + vkCmdPipelineBarrier(command_buffer->GetHandle(), VK_PIPELINE_STAGE_HOST_BIT, dst_pipeline_stage, 0, 0, nullptr, 1, &bufMemBarrier, 0, nullptr); + + command_buffer->End(); + + uint64_t signal_value = NextValues[pool_index].fetch_add(1, std::memory_order_acq_rel); + retire_values[i] = signal_value; + AsyncTimelineJobQueue.Enqueue({command_buffer, Timelines[pool_index], nullptr, dst_pipeline_stage, signal_value}); + } + else + { + UploadFromStagingBuffer(frame_index, thread_index, buffer_view, data, offset, byte_size); + } + } + + void AsyncResourceLoader::UploadFromStagingBuffer(uint8_t frame_index, uint8_t thread_index, BufferView* const destination, const void* data, uint32_t offset, size_t byte_size) + { + if (!destination || !(*destination) || !data || byte_size == 0) + { + return; + } + + uint32_t i = 0; + uint32_t pool_index = (frame_index * Device->CommandBufferMgr->TotalThreadCount) + thread_index; + auto& retire_values = RetireValues[pool_index]; + + for (; i < retire_values.size(); ++i) + { + if (retire_values[i] == 0) + { + break; + } + } + + auto command_buffer = Device->CommandBufferMgr->GetInstantCommandBuffer(QueueType::GRAPHIC_QUEUE, frame_index, thread_index, i); + + BufferView staging_buffer = Device->CreateBuffer(static_cast(byte_size), VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT); + + ZENGINE_VALIDATE_ASSERT(vmaCopyMemoryToAllocation(Device->VmaAllocatorValue, data, staging_buffer.Allocation, offset, byte_size) == VK_SUCCESS, "Failed to perform memory copy operation") + + auto dst_pipeline_stage = Device->CopyBuffer(command_buffer, staging_buffer, *destination, byte_size, 0u, offset); + + command_buffer->End(); + + uint64_t signal_value = NextValues[pool_index].fetch_add(1, std::memory_order_acq_rel); + retire_values[i] = signal_value; + AsyncTimelineJobQueue.Enqueue({command_buffer, Timelines[pool_index], nullptr, dst_pipeline_stage, signal_value}); + Device->EnqueueBufferForDeletion(staging_buffer); + } + + void AsyncResourceLoader::ClearBuffer(uint8_t frame_index, uint8_t thread_index, BufferView* const buffer_view, uint32_t offset, size_t byte_size, uint32_t clear_value) + { + if (!buffer_view || byte_size == 0) + { + return; + } + + VkMemoryPropertyFlags mem_prop_flags; + vmaGetAllocationMemoryProperties(Device->VmaAllocatorValue, buffer_view->Allocation, &mem_prop_flags); + + if (mem_prop_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + { + VmaAllocationInfo allocation_info = {}; + vmaGetAllocationInfo(Device->VmaAllocatorValue, buffer_view->Allocation, &allocation_info); + if (allocation_info.pMappedData) + { + auto mapped_buf = reinterpret_cast(allocation_info.pMappedData); + ZENGINE_VALIDATE_ASSERT(Helpers::secure_memset((mapped_buf + offset), clear_value, allocation_info.size, byte_size) == Helpers::MEMORY_OP_SUCCESS, "Failed to perform memory copy operation") + } + } + else + { + BufferView staging_buffer = Device->CreateBuffer(static_cast(byte_size), VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT); + + VmaAllocationInfo allocation_info = {}; + vmaGetAllocationInfo(Device->VmaAllocatorValue, staging_buffer.Allocation, &allocation_info); + + if (allocation_info.pMappedData) + { + + uint32_t i = 0; + uint32_t pool_index = (frame_index * Device->CommandBufferMgr->TotalThreadCount) + thread_index; + auto& retire_values = RetireValues[pool_index]; + + for (; i < retire_values.size(); ++i) + { + if (retire_values[i] == 0) + { + break; + } + } + + auto command_buffer = Device->CommandBufferMgr->GetInstantCommandBuffer(QueueType::GRAPHIC_QUEUE, frame_index, thread_index, i); + ZENGINE_VALIDATE_ASSERT(Helpers::secure_memset(allocation_info.pMappedData, clear_value, allocation_info.size, byte_size) == Helpers::MEMORY_OP_SUCCESS, "Failed to perform memory copy operation") + ZENGINE_VALIDATE_ASSERT(vmaFlushAllocation(Device->VmaAllocatorValue, staging_buffer.Allocation, 0, byte_size) == VK_SUCCESS, "Failed to flush allocation") + + auto dst_pipeline_stage = Device->CopyBuffer(command_buffer, staging_buffer, *buffer_view, byte_size, 0u, offset); + + command_buffer->End(); + uint64_t signal_value = NextValues[pool_index].fetch_add(1, std::memory_order_acq_rel); + retire_values[i] = signal_value; + AsyncTimelineJobQueue.Enqueue({command_buffer, Timelines[pool_index], nullptr, dst_pipeline_stage, signal_value}); + } + + /* Cleanup resource */ + Device->EnqueueBufferForDeletion(staging_buffer); + } + } + + void AsyncResourceLoader::UploadTextureBuffer(uint8_t frame_index, uint8_t thread_index, const Rendering::Textures::TextureHandle& handle, unsigned char* data) + { + if (!handle.Valid() || !data) + return; + + uint32_t pool_index = (frame_index * Device->CommandBufferMgr->TotalThreadCount) + thread_index; - BufferManager->Initialize(Device); - Helpers::ThreadPoolHelper::Submit([this] { Run(); }); + auto texture = Device->GlobalTextures.Access(handle); + auto img_buf = Device->Image2DBufferManager.Access(texture->BufferHandle); + auto img_buf_aspect = (texture->Specification.Format == Specifications::ImageFormat::DEPTH_STENCIL_FROM_DEVICE) ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; + auto buffer_handle = img_buf->GetHandle(); + + if (Device->HasSeperateTransfertQueueFamily) + { + auto& transfer_retire_values = TransferRetireValues[pool_index]; + + uint32_t i = 0; + for (; i < transfer_retire_values.size(); ++i) + { + if (transfer_retire_values[i] == 0) + break; + } + + auto transfer_cmd = Device->CommandBufferMgr->GetInstantCommandBuffer(QueueType::TRANSFER_QUEUE, frame_index, thread_index, i); + + // 2. Transition to TRANSFER_DST_OPTIMAL + Specifications::ImageMemoryBarrierSpecification to_transfer = {}; + to_transfer.ImageHandle = buffer_handle; + to_transfer.OldLayout = img_buf->Layout; + to_transfer.NewLayout = Specifications::ImageLayout::TRANSFER_DST_OPTIMAL; + to_transfer.ImageAspectMask = VkImageAspectFlagBits(img_buf_aspect); + to_transfer.SourceAccessMask = VK_ACCESS_NONE; + to_transfer.DestinationAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + to_transfer.SourceStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + to_transfer.DestinationStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + to_transfer.LayerCount = texture->Specification.LayerCount; + to_transfer.SourceQueueFamily = Device->TransferFamilyIndex; + to_transfer.DestinationQueueFamily = Device->TransferFamilyIndex; + transfer_cmd->TransitionImageLayout(Primitives::ImageMemoryBarrier{to_transfer}); + + img_buf->Layout = to_transfer.NewLayout; + + // 3. Copy data to image + Device->WriteTextureData(transfer_cmd, handle, data); + + // Release barrier: transfer → graphics ownership + Specifications::ImageMemoryBarrierSpecification release = {}; + release.ImageHandle = buffer_handle; + release.OldLayout = Specifications::ImageLayout::TRANSFER_DST_OPTIMAL; + release.NewLayout = (img_buf_aspect & VK_IMAGE_ASPECT_DEPTH_BIT) ? Specifications::ImageLayout::DEPTH_STENCIL_ATTACHMENT_OPTIMAL : Specifications::ImageLayout::SHADER_READ_ONLY_OPTIMAL; + release.ImageAspectMask = VkImageAspectFlagBits(img_buf_aspect); + release.SourceAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + release.DestinationAccessMask = VK_ACCESS_NONE; // Must be 0 for release + release.SourceStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + release.DestinationStageMask = (img_buf_aspect & VK_IMAGE_ASPECT_DEPTH_BIT) ? VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT : VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + release.LayerCount = texture->Specification.LayerCount; + release.SourceQueueFamily = Device->TransferFamilyIndex; + release.DestinationQueueFamily = Device->GraphicFamilyIndex; + + transfer_cmd->TransitionImageLayout(ImageMemoryBarrier{release}); + img_buf->Layout = release.NewLayout; + transfer_cmd->End(); + + uint64_t transfer_val = TransferNextValues[pool_index].fetch_add(1, std::memory_order_acq_rel); + TransferRetireValues[pool_index][i] = transfer_val; + + AsyncTimelineJobQueue.Enqueue({ + transfer_cmd, + TransferTimelines[pool_index], + nullptr, + VK_PIPELINE_STAGE_TRANSFER_BIT, + transfer_val, + UINT64_MAX // no wait value + }); + + uint32_t acquire_slot = 0; + auto& retire_values = RetireValues[pool_index]; + for (; acquire_slot < retire_values.size(); ++acquire_slot) + { + if (retire_values[acquire_slot] == 0) + { + break; + } + } + + auto acquire_cmd = Device->CommandBufferMgr->GetInstantCommandBuffer(QueueType::GRAPHIC_QUEUE, frame_index, thread_index, acquire_slot); + + // Acquire barrier: graphics takes ownership + ImageMemoryBarrierSpecification acquire_spec = release; // same image/layout params + acquire_spec.SourceAccessMask = VK_ACCESS_NONE; // Must be 0 for acquire + acquire_spec.DestinationAccessMask = VK_ACCESS_SHADER_READ_BIT; + acquire_spec.SourceQueueFamily = Device->TransferFamilyIndex; + acquire_spec.DestinationQueueFamily = Device->GraphicFamilyIndex; + + acquire_cmd->TransitionImageLayout(ImageMemoryBarrier{acquire_spec}); + acquire_cmd->End(); + + uint64_t graphics_val = NextValues[pool_index].fetch_add(1, std::memory_order_acq_rel); + retire_values[acquire_slot] = graphics_val; + + AsyncTimelineJobQueue.Enqueue({acquire_cmd, Timelines[pool_index], TransferTimelines[pool_index], (uint32_t) release.DestinationStageMask, graphics_val, transfer_val}); + + img_buf->Layout = release.NewLayout; + } + else + { + auto& retire_values = RetireValues[pool_index]; + + uint32_t i = 0; + for (; i < retire_values.size(); ++i) + { + if (retire_values[i] == 0) + break; + } + auto cmd = Device->CommandBufferMgr->GetInstantCommandBuffer(QueueType::GRAPHIC_QUEUE, frame_index, thread_index, i); + + ImageMemoryBarrierSpecification to_transfer = {}; + to_transfer.ImageHandle = buffer_handle; + to_transfer.OldLayout = img_buf->Layout; + to_transfer.NewLayout = ImageLayout::TRANSFER_DST_OPTIMAL; + to_transfer.ImageAspectMask = VkImageAspectFlagBits(img_buf_aspect); + to_transfer.SourceAccessMask = VK_ACCESS_NONE; + to_transfer.DestinationAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + to_transfer.SourceStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + to_transfer.DestinationStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + to_transfer.LayerCount = texture->Specification.LayerCount; + to_transfer.SourceQueueFamily = Device->GraphicFamilyIndex; + to_transfer.DestinationQueueFamily = Device->GraphicFamilyIndex; + + cmd->TransitionImageLayout(ImageMemoryBarrier{to_transfer}); + img_buf->Layout = to_transfer.NewLayout; + + Device->WriteTextureData(cmd, handle, data); + + // Single Queue: No ownership transfer needed. Just a normal barrier. + ImageMemoryBarrierSpecification to_final = {}; + to_final.ImageHandle = buffer_handle; + to_final.OldLayout = ImageLayout::TRANSFER_DST_OPTIMAL; + to_final.NewLayout = (img_buf_aspect & VK_IMAGE_ASPECT_DEPTH_BIT) ? ImageLayout::DEPTH_STENCIL_ATTACHMENT_OPTIMAL : ImageLayout::SHADER_READ_ONLY_OPTIMAL; + to_final.ImageAspectMask = VkImageAspectFlagBits(img_buf_aspect); + to_final.SourceAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + to_final.DestinationAccessMask = VK_ACCESS_SHADER_READ_BIT; + to_final.SourceStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + to_final.DestinationStageMask = (img_buf_aspect & VK_IMAGE_ASPECT_DEPTH_BIT) ? VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT : VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + to_final.LayerCount = texture->Specification.LayerCount; + to_final.SourceQueueFamily = Device->GraphicFamilyIndex; + to_final.DestinationQueueFamily = Device->GraphicFamilyIndex; + + cmd->TransitionImageLayout(ImageMemoryBarrier{to_final}); + cmd->End(); + + uint64_t signal_value = NextValues[pool_index].fetch_add(1, std::memory_order_acq_rel); + retire_values[i] = signal_value; + AsyncTimelineJobQueue.Enqueue({cmd, Timelines[pool_index], nullptr, (uint32_t) to_final.DestinationStageMask, signal_value, UINT64_MAX}); + img_buf->Layout = to_final.NewLayout; + } } - Textures::TextureHandle AsyncResourceLoader::LoadTextureFile(cstring filename) + Textures::TextureHandle AsyncResourceLoader::Submit(uint8_t frame_index, uint8_t thread_index, const UploadRequest& request) { - std::unique_lock l(m_mutex_2); + std::unique_lock l(m_mutex); - auto abs_filename = std::filesystem::absolute(filename).string(); + auto abs_filename = std::filesystem::absolute(request.TextureUpload.Filename).string(); int w, h, ch; if (!stbi_info(abs_filename.c_str(), &w, &h, &ch)) @@ -42,7 +472,7 @@ namespace ZEngine::Hardwares } const std::set known_cubmap_file_ext = {".hdr", ".exr"}; - auto file_ext = std::filesystem::path(filename).extension().string(); + auto file_ext = std::filesystem::path(request.TextureUpload.Filename).extension().string(); Specifications::TextureSpecification spec{.Width = (uint32_t) w, .Height = (uint32_t) h, .Format = Specifications::ImageFormat::R8G8B8A8_SRGB}; @@ -59,91 +489,87 @@ namespace ZEngine::Hardwares } TextureFileRequest tex_file_req = {}; - tex_file_req.Filename = filename; + tex_file_req.Filename = request.TextureUpload.Filename; tex_file_req.TextureSpec = spec; tex_file_req.TextureSpec.BytePerPixel = Specifications::BytePerChannelMap[VALUE_FROM_SPEC_MAP(spec.Format)]; tex_file_req.Handle = Device->CreateTexture(tex_file_req.TextureSpec); + tex_file_req.FrameIdx = frame_index; + tex_file_req.ThreadIdx = thread_index; m_file_requests.Enqueue(tex_file_req); - m_cond.notify_one(); + + ThreadPoolHelper::Submit([this] { Run(); }); return tex_file_req.Handle; } - void AsyncResourceLoader::Run() + void AsyncResourceLoader::ClearAsyncJobs() { - while (true) - { - std::unique_lock l(m_mutex); - m_cond.wait(l, [this] { return !m_file_requests.Empty() || !m_update_texture_request.Empty() || !m_upload_requests.Empty() || m_cancellation_token.load() == true; }); + AsyncTimelineJobQueue.Clear(); + Device->AsyncGPUOperations.Clear(); + } - if (m_cancellation_token.load() == true) + void AsyncResourceLoader::SubmitAsyncJobs() + { + while (!AsyncTimelineJobQueue.Empty()) + { + TimelineJob job; + if (AsyncTimelineJobQueue.Pop(job)) { - break; + Device->QueueSubmit(job.Buffer, job.Timeline, job.WaitFlag, job.SignalValue, job.WaitValue, job.WaitTimeline); + Device->EnqueueAsyncGPUOperation({job.WaitFlag, job.SignalValue, job.Timeline}); } + } + } - // Processing update requests - if (m_update_texture_request.Size()) - { - UpdateTextureRequest tr; - if (m_update_texture_request.Pop(tr)) - { - auto texture = Device->GlobalTextures.Access(tr.Handle); - auto img_buf = Device->Image2DBufferManager.Access(texture->BufferHandle); - auto& spec = texture->Specification; - auto image_handle = img_buf->GetHandle(); - uint32_t image_aspect = (texture->Specification.Format == Specifications::ImageFormat::DEPTH_STENCIL_FROM_DEVICE) ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; + void AsyncResourceLoader::ResetCommandBuffers(uint8_t frame_index, uint8_t thread_index) + { + uint32_t pool_index = (frame_index * Device->CommandBufferMgr->TotalThreadCount) + thread_index; - if (Device->HasSeperateTransfertQueueFamily) - { - Specifications::ImageMemoryBarrierSpecification barrier_spec_0 = {}; - barrier_spec_0.ImageHandle = image_handle; - barrier_spec_0.OldLayout = Specifications::ImageLayout::TRANSFER_DST_OPTIMAL; - barrier_spec_0.NewLayout = VkImageAspectFlagBits(image_aspect) == VK_IMAGE_ASPECT_DEPTH_BIT ? Specifications::ImageLayout::DEPTH_STENCIL_ATTACHMENT_OPTIMAL : Specifications::ImageLayout::SHADER_READ_ONLY_OPTIMAL; - barrier_spec_0.ImageAspectMask = VkImageAspectFlagBits(image_aspect); - barrier_spec_0.SourceAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - barrier_spec_0.DestinationAccessMask = VK_ACCESS_NONE; - barrier_spec_0.SourceStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; - barrier_spec_0.DestinationStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - barrier_spec_0.LayerCount = spec.LayerCount; - barrier_spec_0.SourceQueueFamily = Device->TransferFamilyIndex; - barrier_spec_0.DestinationQueueFamily = Device->GraphicFamilyIndex; - Primitives::ImageMemoryBarrier barrier_0{barrier_spec_0}; - auto command_buffer_0 = BufferManager->GetInstantCommandBuffer(QueueType::TRANSFER_QUEUE, Device->CurrentFrameIndex); - { - command_buffer_0->TransitionImageLayout(barrier_0); - img_buf->Layout = barrier_spec_0.NewLayout; - } - BufferManager->EndInstantCommandBuffer(command_buffer_0, Device); - } + uint64_t graphics_value = 0; + auto& retire_values = RetireValues[pool_index]; + vkGetSemaphoreCounterValue(Device->LogicalDevice, Timelines[pool_index]->GetHandle(), &graphics_value); - VkAccessFlags access_flag = Device->HasSeperateTransfertQueueFamily ? VK_ACCESS_NONE : VK_ACCESS_TRANSFER_WRITE_BIT; - VkPipelineStageFlagBits src_stage = Device->HasSeperateTransfertQueueFamily ? VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT : VK_PIPELINE_STAGE_TRANSFER_BIT; - - Specifications::ImageMemoryBarrierSpecification barrier_spec = {}; - barrier_spec.ImageHandle = image_handle; - barrier_spec.OldLayout = Specifications::ImageLayout::TRANSFER_DST_OPTIMAL; - barrier_spec.NewLayout = VkImageAspectFlagBits(image_aspect) == VK_IMAGE_ASPECT_DEPTH_BIT ? Specifications::ImageLayout::DEPTH_STENCIL_ATTACHMENT_OPTIMAL : Specifications::ImageLayout::SHADER_READ_ONLY_OPTIMAL; - barrier_spec.ImageAspectMask = VkImageAspectFlagBits(image_aspect); - barrier_spec.SourceAccessMask = access_flag; - barrier_spec.DestinationAccessMask = VK_ACCESS_SHADER_READ_BIT; - barrier_spec.SourceStageMask = src_stage; - barrier_spec.DestinationStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - barrier_spec.LayerCount = spec.LayerCount; - barrier_spec.SourceQueueFamily = Device->TransferFamilyIndex; - barrier_spec.DestinationQueueFamily = Device->GraphicFamilyIndex; - Primitives::ImageMemoryBarrier barrier{barrier_spec}; - - auto command_buffer = BufferManager->GetInstantCommandBuffer(QueueType::GRAPHIC_QUEUE, Device->CurrentFrameIndex); - { - command_buffer->TransitionImageLayout(barrier); - img_buf->Layout = barrier_spec.NewLayout; - } - BufferManager->EndInstantCommandBuffer(command_buffer, Device); + for (int i = 0; i < retire_values.size(); ++i) + { + auto retire_val = retire_values[i]; + if (retire_val != 0 && graphics_value >= retire_val) + { + auto command_buffer = Device->CommandBufferMgr->GetInstantCommandBuffer(QueueType::GRAPHIC_QUEUE, frame_index, thread_index, i, false); + command_buffer->ResetState(); + vkResetCommandBuffer(command_buffer->GetHandle(), 0); + retire_values[i] = 0; + } + } - Device->TextureHandleToUpdates.Enqueue(tr.Handle); + if (Device->HasSeperateTransfertQueueFamily) + { + uint64_t transfer_value = 0; + auto& transfer_retire = TransferRetireValues[pool_index]; + vkGetSemaphoreCounterValue(Device->LogicalDevice, TransferTimelines[pool_index]->GetHandle(), &transfer_value); + + for (int i = 0; i < transfer_retire.size(); ++i) + { + auto transfer_retire_val = transfer_retire[i]; + if (transfer_retire_val != 0 && transfer_value >= transfer_retire_val) + { + auto transfer_cmd = Device->CommandBufferMgr->GetInstantCommandBuffer(QueueType::TRANSFER_QUEUE, frame_index, thread_index, i, false); + transfer_cmd->ResetState(); + vkResetCommandBuffer(transfer_cmd->GetHandle(), 0); + transfer_retire[i] = 0; } } + } + } + + void AsyncResourceLoader::Run() + { + while (m_cancellation_token.load(std::memory_order_acquire) == false) + { + if (m_file_requests.Empty() && m_upload_requests.Empty()) + { + break; + } // Processing upload requests if (m_upload_requests.Size()) @@ -151,175 +577,178 @@ namespace ZEngine::Hardwares TextureUploadRequest upload_request; if (m_upload_requests.Pop(upload_request)) { - auto texture = Device->GlobalTextures.Access(upload_request.Handle); - auto img_buf = Device->Image2DBufferManager.Access(texture->BufferHandle); - uint32_t image_aspect = (texture->Specification.Format == Specifications::ImageFormat::DEPTH_STENCIL_FROM_DEVICE) ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; - - auto command_buffer = BufferManager->GetInstantCommandBuffer(QueueType::TRANSFER_QUEUE, Device->CurrentFrameIndex); - { - auto image_handle = img_buf->GetHandle(); - auto& image_buffer = img_buf->GetBuffer(); - - Specifications::ImageMemoryBarrierSpecification barrier_spec_0 = {}; - barrier_spec_0.ImageHandle = image_handle; - barrier_spec_0.OldLayout = img_buf->Layout; - barrier_spec_0.NewLayout = Specifications::ImageLayout::TRANSFER_DST_OPTIMAL; - barrier_spec_0.ImageAspectMask = VkImageAspectFlagBits(image_aspect); - barrier_spec_0.SourceAccessMask = 0; - barrier_spec_0.DestinationAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - barrier_spec_0.SourceStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; - barrier_spec_0.DestinationStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; - barrier_spec_0.LayerCount = upload_request.TextureSpec.LayerCount; - barrier_spec_0.SourceQueueFamily = Device->TransferFamilyIndex; - barrier_spec_0.DestinationQueueFamily = Device->TransferFamilyIndex; - - Primitives::ImageMemoryBarrier barrier_0{barrier_spec_0}; - command_buffer->TransitionImageLayout(barrier_0); - - img_buf->Layout = barrier_spec_0.NewLayout; - - Device->WriteTextureData(command_buffer, upload_request.Handle, upload_request.Buffer.data()); - } - BufferManager->EndInstantCommandBuffer(command_buffer, Device, VK_PIPELINE_STAGE_TRANSFER_BIT); - - UpdateTextureRequest tr = {.Handle = upload_request.Handle}; - - m_update_texture_request.Emplace(std::move(tr)); + DeferralUpload deferral = { + .Type = AsyncResourceLoader::UploadType::TEXTURE_BUFFER_LARGE, + .FrameIdx = upload_request.FrameIdx, + .ThreadIdx = upload_request.ThreadIdx, + .Buffer = std::move(upload_request.Buffer), + .TexHandle = upload_request.Handle, + }; + SubmitDeferral(std::move(deferral)); + Device->TextureHandleToUpdates.Enqueue(upload_request.Handle); } } // Processing file requests - TextureFileRequest file_request; - if (m_file_requests.Pop(file_request)) + if (m_file_requests.Size()) { - TextureUploadRequest upload_req = {}; - - int width = 0, height = 0, channel = 0; - stbi_set_flip_vertically_on_load(1); - - if (file_request.TextureSpec.IsCubemap) + TextureFileRequest file_request; + if (m_file_requests.Pop(file_request)) { - const float* image_data = stbi_loadf(file_request.Filename.data(), &width, &height, &channel, 4); - if (!image_data) - { - ZENGINE_CORE_ERROR("Failed to load texture file : {0}", file_request.Filename.data()) - continue; - } + TextureUploadRequest upload_req = {}; - bool perform_convert_rgb_to_rgba = (channel == STBI_rgb); + int width = 0, height = 0, channel = 0; + stbi_set_flip_vertically_on_load(1); - std::vector output_buffer = {}; - if (perform_convert_rgb_to_rgba) + if (file_request.TextureSpec.IsCubemap) { - size_t total_pixel = width * height; - size_t buffer_size = total_pixel * 4; - output_buffer.resize(buffer_size); - stbir_resize_float(image_data, width, height, 0, output_buffer.data(), width, height, 0, 4); + const float* image_data = stbi_loadf(file_request.Filename.data(), &width, &height, &channel, 4); + if (!image_data) + { + ZENGINE_CORE_ERROR("Failed to load texture file : {0}", file_request.Filename.data()) + continue; + } - for (int i = 0; i < total_pixel; ++i) + bool perform_convert_rgb_to_rgba = (channel == STBI_rgb); + + std::vector output_buffer = {}; + if (perform_convert_rgb_to_rgba) { - int offset = i * 4; // RGBA format (4 channels) + size_t total_pixel = width * height; + size_t buffer_size = total_pixel * 4; + output_buffer.resize(buffer_size); + stbir_resize_float(image_data, width, height, 0, output_buffer.data(), width, height, 0, 4); - if (channel == 1) - { - output_buffer[offset + 3] = 255; - } - else if (channel == 2) - { - output_buffer[offset + 3] = image_data[i * 2 + 1]; - } - else if (channel == 3) + for (int i = 0; i < total_pixel; ++i) { - output_buffer[offset + 3] = 255; + int offset = i * 4; // RGBA format (4 channels) + + if (channel == 1) + { + output_buffer[offset + 3] = 255; + } + else if (channel == 2) + { + output_buffer[offset + 3] = image_data[i * 2 + 1]; + } + else if (channel == 3) + { + output_buffer[offset + 3] = 255; + } } } - } - else - { - size_t total_pixel = width * height; - size_t buffer_size = total_pixel * channel; - output_buffer.resize(buffer_size); - Helpers::secure_memset(output_buffer.data(), 0.f, buffer_size, buffer_size); - } - - stbi_image_free((void*) image_data); + else + { + size_t total_pixel = width * height; + size_t buffer_size = total_pixel * channel; + output_buffer.resize(buffer_size); + Helpers::secure_memset(output_buffer.data(), 0.f, buffer_size, buffer_size); + } - Buffers::Bitmap in = {width, height, 4, Buffers::BitmapFormat::FLOAT, output_buffer.data()}; - Buffers::Bitmap vertical_cross = Buffers::Bitmap::EquirectangularMapToVerticalCross(in); - Buffers::Bitmap cubemap = Buffers::Bitmap::VerticalCrossToCubemap(vertical_cross); + stbi_image_free((void*) image_data); - // spec.Width = cubemap.Width; - // spec.Height = cubemap.Height; - size_t buffer_size = cubemap.Buffer.size(); - size_t buffer_byte = buffer_size * sizeof(uint8_t); - upload_req.Buffer.resize(buffer_size); - Helpers::secure_memmove(upload_req.Buffer.data(), buffer_byte, cubemap.Buffer.data(), buffer_byte); - } - else - { + Buffers::Bitmap in = {width, height, 4, Buffers::BitmapFormat::FLOAT, output_buffer.data()}; + Buffers::Bitmap vertical_cross = Buffers::Bitmap::EquirectangularMapToVerticalCross(in); + Buffers::Bitmap cubemap = Buffers::Bitmap::VerticalCrossToCubemap(vertical_cross); - stbi_uc* image_data = stbi_load(file_request.Filename.data(), &width, &height, &channel, STBI_rgb_alpha); - if (!image_data) - { - ZENGINE_CORE_ERROR("Failed to load texture file : {0}", file_request.Filename.data()) - continue; + // spec.Width = cubemap.Width; + // spec.Height = cubemap.Height; + size_t buffer_size = cubemap.Buffer.size(); + size_t buffer_byte = buffer_size * sizeof(uint8_t); + upload_req.Buffer.resize(buffer_size); + Helpers::secure_memmove(upload_req.Buffer.data(), buffer_byte, cubemap.Buffer.data(), buffer_byte); } + else + { - bool perform_convert_rgb_to_rgba = (channel <= STBI_rgb); + stbi_uc* image_data = stbi_load(file_request.Filename.data(), &width, &height, &channel, STBI_rgb_alpha); + if (!image_data) + { + ZENGINE_CORE_ERROR("Failed to load texture file : {0}", file_request.Filename.data()) + continue; + } - if (perform_convert_rgb_to_rgba) - { - size_t total_pixel = width * height; - size_t buffer_size = total_pixel * 4; - upload_req.Buffer.resize(buffer_size); - stbir_resize_uint8(image_data, width, height, 0, upload_req.Buffer.data(), width, height, 0, 4); + bool perform_convert_rgb_to_rgba = (channel <= STBI_rgb); - for (int i = 0; i < total_pixel; ++i) + if (perform_convert_rgb_to_rgba) { - int offset = i * 4; // RGBA format (4 channels) + size_t total_pixel = width * height; + size_t buffer_size = total_pixel * 4; + upload_req.Buffer.resize(buffer_size); + stbir_resize_uint8(image_data, width, height, 0, upload_req.Buffer.data(), width, height, 0, 4); - if (channel == 1) - { - upload_req.Buffer[offset + 3] = 255; - } - else if (channel == 2) - { - upload_req.Buffer[offset + 3] = image_data[i * 2 + 1]; - } - else if (channel == 3) + for (int i = 0; i < total_pixel; ++i) { - upload_req.Buffer[offset + 3] = 255; + int offset = i * 4; // RGBA format (4 channels) + + if (channel == 1) + { + upload_req.Buffer[offset + 3] = 255; + } + else if (channel == 2) + { + upload_req.Buffer[offset + 3] = image_data[i * 2 + 1]; + } + else if (channel == 3) + { + upload_req.Buffer[offset + 3] = 255; + } } } - } - else - { - size_t total_pixel = width * height; - size_t buffer_size = total_pixel * channel; - upload_req.Buffer.resize(buffer_size, 0); - Helpers::secure_memmove(upload_req.Buffer.data(), buffer_size, image_data, buffer_size); - } + else + { + size_t total_pixel = width * height; + size_t buffer_size = total_pixel * channel; + upload_req.Buffer.resize(buffer_size, 0); + Helpers::secure_memmove(upload_req.Buffer.data(), buffer_size, image_data, buffer_size); + } - stbi_image_free(image_data); - } + stbi_image_free(image_data); + } - upload_req.BufferSize = (upload_req.Buffer.size() * sizeof(uint8_t)); - upload_req.Handle = file_request.Handle; - upload_req.TextureSpec = file_request.TextureSpec; + upload_req.BufferSize = (upload_req.Buffer.size() * sizeof(uint8_t)); + upload_req.Handle = file_request.Handle; + upload_req.TextureSpec = file_request.TextureSpec; + upload_req.FrameIdx = file_request.FrameIdx; + upload_req.ThreadIdx = file_request.ThreadIdx; - m_upload_requests.Emplace(std::move(upload_req)); + m_upload_requests.Emplace(std::move(upload_req)); + } } } } void AsyncResourceLoader::Shutdown() { + m_cancellation_token.store(true, std::memory_order_release); + // We are safe to call destructor to clean up semaphore resources + // Timeline->~Semaphore(); + } + + void AsyncResourceLoader::Reset() + { + auto total_thread_count = Device->CommandBufferMgr->TotalThreadCount; + auto frame_count = Device->SwapchainPtr->BufferredFrameCount; + + for (uint32_t f = 0; f < frame_count; ++f) { - std::unique_lock l(m_mutex); - m_cancellation_token = true; - } - m_cond.notify_one(); + for (uint32_t t = 0; t < total_thread_count; ++t) + { + ResetCommandBuffers(f, t); + + uint32_t pool_index = (f * Device->CommandBufferMgr->TotalThreadCount) + t; - BufferManager->Deinitialize(); + uint64_t grahic_value = 0; + vkGetSemaphoreCounterValue(Device->LogicalDevice, Timelines[pool_index]->GetHandle(), &grahic_value); + NextValues[pool_index].store(grahic_value + 1, std::memory_order_release); + + if (Device->HasSeperateTransfertQueueFamily) + { + uint64_t transfer_value = 0; + vkGetSemaphoreCounterValue(Device->LogicalDevice, TransferTimelines[pool_index]->GetHandle(), &transfer_value); + TransferNextValues[pool_index].store(transfer_value + 1, std::memory_order_release); + } + } + } } } // namespace ZEngine::Hardwares diff --git a/ZEngine/ZEngine/Hardwares/AsyncResourceLoader.h b/ZEngine/ZEngine/Hardwares/AsyncResourceLoader.h index 0a3b21e2..3833731b 100644 --- a/ZEngine/ZEngine/Hardwares/AsyncResourceLoader.h +++ b/ZEngine/ZEngine/Hardwares/AsyncResourceLoader.h @@ -1,28 +1,30 @@ #pragma once #include +#include #include #include +#include namespace ZEngine::Hardwares { struct VulkanDevice; - struct CommandBufferManager; - - struct UpdateTextureRequest - { - Rendering::Textures::TextureHandle Handle; - Rendering::Textures::Texture* Texture; - }; + struct AsyncGPUOperation; + struct BufferView; + struct CommandBuffer; struct TextureFileRequest { std::string Filename; Rendering::Textures::TextureHandle Handle; Rendering::Specifications::TextureSpecification TextureSpec; + uint8_t FrameIdx = 0; + uint8_t ThreadIdx = 0; }; struct TextureUploadRequest { + uint8_t FrameIdx = 0; + uint8_t ThreadIdx = 0; size_t BufferSize = 0; Rendering::Textures::TextureHandle Handle = {}; Rendering::Specifications::TextureSpecification TextureSpec = {}; @@ -31,21 +33,92 @@ namespace ZEngine::Hardwares struct AsyncResourceLoader { - VulkanDevice* Device = nullptr; - Hardwares::CommandBufferManager* BufferManager = nullptr; + enum class UploadType : uint8_t + { + TEXTURE_BUFFER = 0, + TEXTURE_BUFFER_LARGE, + TEXTURE_FILE, + BUFFER, + STAGING_BUFFER, + BUFFER_CLEAR, + }; + + struct UploadRequest + { + UploadType Type; + union + { + struct + { + unsigned char* Data = nullptr; + cstring Filename = nullptr; + Rendering::Textures::TextureHandle TexHandle = {}; + } TextureUpload; + struct + { + BufferView* const Buffer = nullptr; + const void* Data = nullptr; + uint32_t Offset = 0; + uint32_t ClearValue = 0; + size_t ByteSize = 0; + } BufferUpload; + }; + }; + + struct TimelineJob + { + CommandBuffer* Buffer = nullptr; + Rendering::Primitives::Semaphore* Timeline = nullptr; + Rendering::Primitives::Semaphore* WaitTimeline = nullptr; + uint32_t WaitFlag = 0; + uint64_t SignalValue = 0; + uint64_t WaitValue = UINT64_MAX; + }; + + struct DeferralUpload + { + UploadType Type; + uint8_t FrameIdx = 0; + uint8_t ThreadIdx = 0; + std::variant> Buffer; + Rendering::Textures::TextureHandle TexHandle = {}; + }; + + VulkanDevice* Device = nullptr; + uint32_t TotalCommandBufferCount = 0; + Core::Containers::Array NextValues = {}; + Core::Containers::Array TransferNextValues = {}; + Core::Containers::Array Timelines = {}; + Core::Containers::Array TransferTimelines = {}; + Core::Containers::Array> RetireValues = {}; + Core::Containers::Array> TransferRetireValues = {}; + Helpers::ThreadSafeQueue AsyncTimelineJobQueue = {}; + Helpers::ThreadSafeQueue DeferralUploadQueue = {}; + + void Initialize(VulkanDevice* device); + + void UploadTextureBuffer(uint8_t frame_index, uint8_t thread_index, const Rendering::Textures::TextureHandle& handle, unsigned char* data); + void UploadBuffer(uint8_t frame_index, uint8_t thread_index, BufferView* const buffer, const void* data, uint32_t offset, size_t byte_size); + void UploadFromStagingBuffer(uint8_t frame_index, uint8_t thread_index, BufferView* const destination, const void* data, uint32_t offset, size_t byte_size); + void ClearBuffer(uint8_t frame_index, uint8_t thread_index, BufferView* const buffer, uint32_t offset, size_t byte_size, uint32_t clear_value); + Rendering::Textures::TextureHandle LoadTextureFile(cstring filename) = delete; + + void Submit(UploadType type, uint8_t frame_index, uint8_t thread_index, const UploadRequest& request); + void SubmitDeferral(DeferralUpload&& deferral); + Rendering::Textures::TextureHandle Submit(uint8_t frame_index, uint8_t thread_index, const UploadRequest& request); - void Initialize(VulkanDevice* renderer); - void Run(); - void Shutdown(); + void CompleteDeferrals(); + void SubmitAsyncJobs(); + void ResetCommandBuffers(uint8_t frame_index, uint8_t thread_index); + void ClearAsyncJobs(); - Rendering::Textures::TextureHandle LoadTextureFile(cstring filename); + void Run(); + void Shutdown(); + void Reset(); private: std::atomic_bool m_cancellation_token{false}; std::mutex m_mutex; - std::mutex m_mutex_2; - std::condition_variable m_cond; - Helpers::ThreadSafeQueue m_update_texture_request; Helpers::ThreadSafeQueue m_file_requests; Helpers::ThreadSafeQueue m_upload_requests; }; diff --git a/ZEngine/ZEngine/Hardwares/DeviceSwapchain.cpp b/ZEngine/ZEngine/Hardwares/DeviceSwapchain.cpp new file mode 100644 index 00000000..17cc4596 --- /dev/null +++ b/ZEngine/ZEngine/Hardwares/DeviceSwapchain.cpp @@ -0,0 +1,509 @@ +#include +#include +#include +#include +#include +#include + +using namespace ZEngine::Core::Containers; +using namespace ZEngine::Rendering; +using namespace ZEngine::Rendering::Renderers; +using namespace ZEngine::Rendering::Specifications; + +namespace ZEngine::Hardwares +{ + void DeviceSwapchain::Initialize(VulkanDevice* const device, uint32_t buffered_frame_size) + { + device->Arena->CreateSubArena(ZMega(3), &Arena); + + Device = device; + + BufferredFrameCount = buffered_frame_size; + FrameContextPoolSize = BufferredFrameCount * FrameContextPoolSizeFactor; + + RenderTimeline = ZPushStructCtorArgs(&Arena, Primitives::Semaphore, Device, true); + + Specifications::AttachmentSpecification attachment_specification = {.BindPoint = Specifications::PipelineBindPoint::GRAPHIC}; + attachment_specification.ColorsMap.init(&Arena, 2); + attachment_specification.ColorsMap[0] = {}; + attachment_specification.ColorsMap[0].Format = ImageFormat::FORMAT_FROM_DEVICE; + attachment_specification.ColorsMap[0].Load = LoadOperation::CLEAR; + attachment_specification.ColorsMap[0].Store = StoreOperation::STORE; + attachment_specification.ColorsMap[0].Initial = ImageLayout::UNDEFINED; + attachment_specification.ColorsMap[0].Final = ImageLayout::PRESENT_SRC; + attachment_specification.ColorsMap[0].ReferenceLayout = ImageLayout::COLOR_ATTACHMENT_OPTIMAL; + SwapchainAttachment = ZPushStructCtorArgs(&Arena, RenderPasses::Attachment, Device, attachment_specification); + + IdleFrameThreshold.store(BufferredFrameCount * 3 * 3 * 3, std::memory_order_release); + FrameContexts.init(&Arena, FrameContextPoolSize, FrameContextPoolSize); + + for (uint32_t i = 0; i < FrameContextPoolSize; ++i) + { + auto& frame = FrameContexts[i]; + + frame.Index = (i % BufferredFrameCount); + frame.Acquired = ZPushStructCtorArgs(&Arena, Primitives::Semaphore, Device); + frame.Fence = ZPushStructCtorArgs(&Arena, Primitives::Fence, Device, true); + } + + Create(); + + ImageInFlights.init(&Arena, SwapchainImageCount, SwapchainImageCount); + RenderCompletes.init(&Arena, SwapchainImageCount, SwapchainImageCount); + PresentCompletes.init(&Arena, SwapchainImageCount, SwapchainImageCount); + for (uint32_t i = 0; i < SwapchainImageCount; ++i) + { + ImageInFlights[i] = nullptr; + RenderCompletes[i] = ZPushStructCtorArgs(&Arena, Primitives::Semaphore, Device); + PresentCompletes[i] = ZPushStructCtorArgs(&Arena, Primitives::Fence, Device); + } + } + + void DeviceSwapchain::Create() + { + VkSurfaceCapabilitiesKHR capabilities{}; + vkGetPhysicalDeviceSurfaceCapabilitiesKHR(Device->PhysicalDevice, Device->Surface, &capabilities); + if (capabilities.currentExtent.width != std::numeric_limits::max()) + { + SwapchainImageWidth = capabilities.currentExtent.width; + SwapchainImageHeight = capabilities.currentExtent.height; + } + + VkSwapchainKHR old_swapchain = (SwapchainHandle != VK_NULL_HANDLE) ? SwapchainHandle : VK_NULL_HANDLE; + auto min_image_count = std::clamp(capabilities.minImageCount, capabilities.minImageCount, capabilities.maxImageCount == 0 ? capabilities.minImageCount + 1 : capabilities.maxImageCount); + VkSwapchainCreateInfoKHR swapchain_create_info = { + .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, + .pNext = nullptr, + .surface = Device->Surface, + .minImageCount = min_image_count, + .imageFormat = Device->SurfaceFormat.format, + .imageColorSpace = Device->SurfaceFormat.colorSpace, + .imageExtent = VkExtent2D{.width = SwapchainImageWidth, .height = SwapchainImageHeight}, + .imageArrayLayers = 1, + .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT, + .preTransform = capabilities.currentTransform, + .compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR, + .presentMode = Device->PresentMode, + .clipped = VK_TRUE, + .oldSwapchain = old_swapchain, + }; + + auto scratch = ZGetScratch(&Arena); + + Array family_indice = {}; + uint32_t family_indice_count = Device->HasSeperateTransfertQueueFamily ? 2 : 1; + family_indice.init(scratch.Arena, family_indice_count, family_indice_count); + family_indice[0] = Device->GraphicFamilyIndex; + if (Device->HasSeperateTransfertQueueFamily) + { + family_indice[1] = Device->TransferFamilyIndex; + } + swapchain_create_info.imageSharingMode = Device->HasSeperateTransfertQueueFamily ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE; + swapchain_create_info.queueFamilyIndexCount = Device->HasSeperateTransfertQueueFamily ? 2 : 1; + swapchain_create_info.pQueueFamilyIndices = family_indice.data(); + + ZENGINE_VALIDATE_ASSERT(vkCreateSwapchainKHR(Device->LogicalDevice, &swapchain_create_info, nullptr, &SwapchainHandle) == VK_SUCCESS, "Failed to create Swapchain") + ZENGINE_VALIDATE_ASSERT(vkGetSwapchainImagesKHR(Device->LogicalDevice, SwapchainHandle, &SwapchainImageCount, nullptr) == VK_SUCCESS, "Failed to get Images count from Swapchain") + ZReleaseScratch(scratch); + + if (SwapchainImageViews.capacity() <= 0) + { + SwapchainImageViews.init(&Arena, SwapchainImageCount, SwapchainImageCount); + } + + if (SwapchainFramebuffers.capacity() <= 0) + { + SwapchainFramebuffers.init(&Arena, SwapchainImageCount, SwapchainImageCount); + } + + scratch = ZGetScratch(&Arena); + + Array swapchain_images = {}; + swapchain_images.init(scratch.Arena, SwapchainImageCount, SwapchainImageCount); + ZENGINE_VALIDATE_ASSERT(vkGetSwapchainImagesKHR(Device->LogicalDevice, SwapchainHandle, &SwapchainImageCount, swapchain_images.data()) == VK_SUCCESS, "Failed to get VkImages from Swapchain") + for (int i = 0; i < SwapchainImageCount; ++i) + { + SwapchainImageViews[i] = Device->CreateImageView(swapchain_images[i], Device->SurfaceFormat.format, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_ASPECT_COLOR_BIT); + + Array fb_images_views; + fb_images_views.init(scratch.Arena, 1); + fb_images_views.push(SwapchainImageViews[i]); + SwapchainFramebuffers[i] = Device->CreateFramebuffer(ArrayView{fb_images_views}, SwapchainAttachment->GetHandle(), SwapchainImageWidth, SwapchainImageHeight); + } + + ZReleaseScratch(scratch); + + if (old_swapchain != VK_NULL_HANDLE) + { + ZENGINE_DESTROY_VULKAN_HANDLE(Device->LogicalDevice, vkDestroySwapchainKHR, old_swapchain, nullptr) + } + } + + void DeviceSwapchain::Clear() + { + for (uint32_t i = 0; i < SwapchainImageCount; ++i) + { + Device->EnqueueForDeletion(DeviceResourceType::IMAGEVIEW, SwapchainImageViews[i]); + Device->EnqueueForDeletion(DeviceResourceType::FRAMEBUFFER, SwapchainFramebuffers[i]); + SwapchainImageViews[i] = VK_NULL_HANDLE; + SwapchainFramebuffers[i] = VK_NULL_HANDLE; + } + + for (uint32_t i = 0; i < SwapchainImageCount; ++i) + { + ImageInFlights[i] = nullptr; + } + // We don't call .clear() because we want to reuse the allocated space + // SwapchainImageViews.clear(); + // SwapchainFramebuffers.clear(); + } + + void DeviceSwapchain::Dispose() + { + Clear(); + ZENGINE_DESTROY_VULKAN_HANDLE(Device->LogicalDevice, vkDestroySwapchainKHR, SwapchainHandle, nullptr) + SwapchainAttachment->Dispose(); + } + + void DeviceSwapchain::AcquireNextImage(uint32_t frame_context_idx) + { + if (HasRecreationPending) + { + /* + * On macOS: + * Because of ASYNCHRONOUS communication between MoltenVK and Metal: + * 1. vkDeviceWaitIdle() only guarantees Vulkan sees GPU idle + * 2. Metal's CAMetalLayer may still have pending present operations + * 3. Semaphores can appear "stuck" in Submitted state due to Metal async completion + * + * Pool of BufferredFrameCount * 4 = 12 contexts rotates every resize. + * Skipping 3 ensures we land on fully Idle semaphores/fences even under Metal async. + * + */ +#ifdef __APPLE__ + vkDeviceWaitIdle(Device->LogicalDevice); +#endif + for (uint32_t i = 0; i < ImageInFlights.size(); ++i) + { + if (ImageInFlights[i] != nullptr) + { + ImageInFlights[i]->Wait(UINT64_MAX); + } + } + + for (uint32_t i = 0; i < PresentCompletes.size(); ++i) + { + if (PresentCompletes[i]->GetState() == Rendering::Primitives::FenceState::Submitted) + { + PresentCompletes[i]->Wait(UINT64_MAX); + PresentCompletes[i]->Reset(); + } + } + + for (int i = 0; i < FrameContextPoolSizeFactor; ++i) + { + FrameContext& frame = FrameContexts[i + FrameContextOffset]; + frame.Acquired->SetState(Primitives::SemaphoreState::Idle); + } + + Device->AsyncResLoader->ClearAsyncJobs(); + Device->AsyncResLoader->Reset(); + + uint64_t timeline_value = 0; + vkGetSemaphoreCounterValue(Device->LogicalDevice, RenderTimeline->GetHandle(), &timeline_value); + + ZENGINE_VALIDATE_ASSERT(timeline_value >= RenderTimelineNextValue, "Render Timeline value is behind the last submitted value, this should never happen.") + ZENGINE_VALIDATE_ASSERT(timeline_value < UINT64_MAX, "Render Timeline value is corrupted, this should never happen.") + RenderTimelineNextValue = timeline_value; + + FrameContextOffset = (FrameContextOffset + FrameContextPoolSizeFactor) % FrameContextPoolSize; + // CurrentFrame = nullptr; + + Clear(); + Create(); + + HasRecreationPending = false; + ZENGINE_CORE_WARN("Swapchain has been re-created") + } + + FrameContext& frame = FrameContexts[frame_context_idx + FrameContextOffset]; + if (frame.Fence->GetState() == Rendering::Primitives::FenceState::Submitted) + { + frame.Fence->Wait(UINT64_MAX); + } + frame.Fence->Reset(); + frame.Acquired->SetState(Rendering::Primitives::SemaphoreState::Idle); + + uint32_t image_idx = 0; + VkResult acquire_image_result = vkAcquireNextImageKHR(Device->LogicalDevice, SwapchainHandle, UINT64_MAX, frame.Acquired->GetHandle(), VK_NULL_HANDLE, &(image_idx)); + frame.Acquired->SetState(Primitives::SemaphoreState::Submitted); + + if (PresentCompletes[image_idx]->GetState() == Rendering::Primitives::FenceState::Submitted) + { + PresentCompletes[image_idx]->Wait(UINT64_MAX); + PresentCompletes[image_idx]->Reset(); + } + + if (ImageInFlights[image_idx] != nullptr) + { + if (!(ImageInFlights[image_idx])->IsSignaled()) + { + ImageInFlights[image_idx]->Wait(UINT64_MAX); + } + } + RenderCompletes[image_idx]->SetState(Rendering::Primitives::SemaphoreState::Idle); + + ImageInFlights[image_idx] = frame.Fence; + frame.ImageIndex = image_idx; + CurrentFrame = &frame; + + if (acquire_image_result == VK_SUBOPTIMAL_KHR || acquire_image_result == VK_ERROR_OUT_OF_DATE_KHR) + { + ImageInFlights[frame.ImageIndex] = nullptr; + HasRecreationPending = true; + } + } + + void DeviceSwapchain::Present() + { + if (HasRecreationPending) + { + IdleFrameCount.fetch_add(1); + Device->CommandBufferMgr->ResetEnqueuedBufferIndex(); + return; + } + + { + Textures::TextureHandle tex_handle = {}; + while (Device->TextureHandleToUpdates.Pop(tex_handle)) + { + auto texture = Device->GlobalTextures.Access(tex_handle); + + if (!texture) + { + Device->TextureHandleToUpdates.Enqueue(tex_handle); + break; + } + auto img_buf = Device->Image2DBufferManager.Access(texture->BufferHandle); + const auto& image_info = img_buf->GetDescriptorImageInfo(); + + auto scratch = ZGetScratch(&Arena); + { + Array write_descriptor_sets = {}; + write_descriptor_sets.init(scratch.Arena, Device->WriteBindlessDescriptorSetRequests.size()); + + for (auto& req : Device->WriteBindlessDescriptorSetRequests) + { + write_descriptor_sets.push( + VkWriteDescriptorSet{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = req.DstSet, + .dstBinding = req.Binding, + .dstArrayElement = (uint32_t) tex_handle.Index, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = &(image_info), + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }); + } + vkUpdateDescriptorSets(Device->LogicalDevice, (uint32_t) write_descriptor_sets.size(), write_descriptor_sets.data(), 0, nullptr); + } + ZReleaseScratch(scratch); + } + } + + { + Textures::TextureHandle tex_to_dispose = {}; + while (Device->TextureHandleToDispose.Pop(tex_to_dispose)) + { + auto texture = Device->GlobalTextures.Access(tex_to_dispose); + if (texture) + { + texture->Dispose(); + Device->GlobalTextures.Remove(tex_to_dispose); + } + } + } + + auto scratch = ZGetScratch(&Arena); + + Array buffer = {}; + buffer.init(scratch.Arena, Device->CommandBufferMgr->EnqueuedCommandBufferIndex, Device->CommandBufferMgr->EnqueuedCommandBufferIndex); + for (int i = 0; i < buffer.size(); ++i) + { + buffer[i] = Device->CommandBufferMgr->EnqueuedCommandBuffers[i]->GetHandle(); + } + + auto render_complete = RenderCompletes[CurrentFrame->ImageIndex]; + auto present_complete = PresentCompletes[CurrentFrame->ImageIndex]; + + ZENGINE_VALIDATE_ASSERT(render_complete->GetState() != Rendering::Primitives::SemaphoreState::Submitted, "Signal semaphore is already in a signaled state.") + ZENGINE_VALIDATE_ASSERT(CurrentFrame->Fence->GetState() != Rendering::Primitives::FenceState::Submitted, "Signal fence is already in a signaled state.") + + QueueView queue = Device->GetQueue(Rendering::QueueType::GRAPHIC_QUEUE); + + // for the rendering and presentation, we use the 3-submit pattern + // This is due to Intel drivers bug that deosn't support well the combinaison of Timeline + Binary Semaphore. + // + // 1 - Acquire bridge + // 2 - Rendering work + // 3 - Present bridge + + // 1- Binary Acquire to a Timeline value + uint64_t frame_start_value = ++RenderTimelineNextValue; + uint64_t ignored_wait_val = 0; + VkTimelineSemaphoreSubmitInfo timeline_info0 = { + .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, + .waitSemaphoreValueCount = 1, // must match waitSemaphoreCount + .pWaitSemaphoreValues = &ignored_wait_val, + .signalSemaphoreValueCount = 1, + .pSignalSemaphoreValues = &frame_start_value, + }; + + VkPipelineStageFlags acquire_wait_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + VkSemaphore acquire_wait_semaphores[] = {CurrentFrame->Acquired->GetHandle()}; + VkSemaphore acquire_signal_semaphores[] = {RenderTimeline->GetHandle()}; + VkSubmitInfo submit_0 = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = &timeline_info0, + .waitSemaphoreCount = 1, + .pWaitSemaphores = acquire_wait_semaphores, + .pWaitDstStageMask = &acquire_wait_stage, + .commandBufferCount = 0, + .signalSemaphoreCount = 1, + .pSignalSemaphores = acquire_signal_semaphores, + }; + VkResult r0 = vkQueueSubmit(queue.Handle, 1, &submit_0, VK_NULL_HANDLE); + ZENGINE_VALIDATE_ASSERT(r0 == VK_SUCCESS, "Failed to submit acquire bridge") + + struct TimelineAggregate + { + uint64_t MaxValue = 0; + VkPipelineStageFlags StageMask = 0; + }; + + Array wait_semaphores = {}; + Array wait_values = {}; + Array stage_flags = {}; + HashMap max_val_timeline_semaphores = {}; + + wait_semaphores.init(scratch.Arena, 10); + stage_flags.init(scratch.Arena, 10); + wait_values.init(scratch.Arena, 10); + max_val_timeline_semaphores.init(scratch.Arena); + + wait_semaphores.push(RenderTimeline->GetHandle()); + wait_values.push(frame_start_value); + stage_flags.push(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT); + + { + Hardwares::AsyncGPUOperationHandle op; + while (Device->AsyncGPUOperations.Pop(op)) + { + if (!max_val_timeline_semaphores.contains(op.Timeline)) + { + max_val_timeline_semaphores.insert(op.Timeline, {op.SignalValue, op.StageFlags}); + continue; + } + auto& val = max_val_timeline_semaphores[op.Timeline]; + val.MaxValue = std::max(val.MaxValue, op.SignalValue); + val.StageMask |= op.StageFlags; + } + } + + for (auto [sem, val] : max_val_timeline_semaphores) + { + wait_semaphores.push(sem->GetHandle()); + wait_values.push(val.MaxValue); + stage_flags.push(val.StageMask); + } + + uint64_t work_complete_value = ++RenderTimelineNextValue; + VkSemaphore work_signal_semaphores[] = {RenderTimeline->GetHandle()}; + VkTimelineSemaphoreSubmitInfo timeline_info_1 = { + .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, + .waitSemaphoreValueCount = (uint32_t) wait_values.size(), + .pWaitSemaphoreValues = wait_values.data(), + .signalSemaphoreValueCount = 1, + .pSignalSemaphoreValues = &work_complete_value, + }; + + VkSubmitInfo submit_info_1 = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = &timeline_info_1, + .waitSemaphoreCount = (uint32_t) wait_semaphores.size(), + .pWaitSemaphores = wait_semaphores.data(), + .pWaitDstStageMask = stage_flags.data(), + .commandBufferCount = (uint32_t) buffer.size(), + .pCommandBuffers = buffer.data(), + .signalSemaphoreCount = 1, + .pSignalSemaphores = work_signal_semaphores, + }; + + auto submit = vkQueueSubmit(queue.Handle, 1, &(submit_info_1), CurrentFrame->Fence->GetHandle()); + ZENGINE_VALIDATE_ASSERT(submit == VK_SUCCESS, "Failed to submit queue") + + ZReleaseScratch(scratch); + + Device->CommandBufferMgr->ResetEnqueuedBufferIndex(); + CurrentFrame->Fence->SetState(Rendering::Primitives::FenceState::Submitted); + + uint64_t dummy_signal_val = 0; + VkPipelineStageFlags present_wait_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + VkSemaphore present_wait_semaphores[] = {RenderTimeline->GetHandle()}; + VkSemaphore present_signal_semaphores[] = {render_complete->GetHandle()}; + VkTimelineSemaphoreSubmitInfo timeline_info2 = { + .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, + .waitSemaphoreValueCount = 1, + .pWaitSemaphoreValues = &work_complete_value, + .signalSemaphoreValueCount = 1, + .pSignalSemaphoreValues = &dummy_signal_val, + }; + + VkSubmitInfo submit2 = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = &timeline_info2, + .waitSemaphoreCount = 1, + .pWaitSemaphores = present_wait_semaphores, + .pWaitDstStageMask = &present_wait_stage, + .commandBufferCount = 0, + .signalSemaphoreCount = 1, + .pSignalSemaphores = present_signal_semaphores, + }; + + VkResult r2 = vkQueueSubmit(queue.Handle, 1, &submit2, present_complete->GetHandle()); + ZENGINE_VALIDATE_ASSERT(r2 == VK_SUCCESS, "Failed to submit present bridge") + + render_complete->SetState(Rendering::Primitives::SemaphoreState::Submitted); + present_complete->SetState(Rendering::Primitives::FenceState::Submitted); + + VkSwapchainKHR swapchains[] = {SwapchainHandle}; + uint32_t frames[] = {CurrentFrame->ImageIndex}; + VkSemaphore semaphores[] = {render_complete->GetHandle()}; + VkPresentInfoKHR present_info = { + .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, + .pNext = nullptr, + .waitSemaphoreCount = 1, + .pWaitSemaphores = semaphores, + .swapchainCount = 1, + .pSwapchains = swapchains, + .pImageIndices = frames, + }; + VkResult present_result = vkQueuePresentKHR(queue.Handle, &present_info); + + IdleFrameCount.fetch_add(1); + + if (present_result == VK_ERROR_OUT_OF_DATE_KHR || present_result == VK_SUBOPTIMAL_KHR) + { + HasRecreationPending = true; + if (present_result == VK_ERROR_OUT_OF_DATE_KHR) + { + return; + } + } + + ZENGINE_VALIDATE_ASSERT(present_result == VK_SUCCESS || present_result == VK_SUBOPTIMAL_KHR, "Failed to present current frame on Window") + } +} // namespace ZEngine::Hardwares diff --git a/ZEngine/ZEngine/Hardwares/DeviceSwapchain.h b/ZEngine/ZEngine/Hardwares/DeviceSwapchain.h new file mode 100644 index 00000000..65e4dcca --- /dev/null +++ b/ZEngine/ZEngine/Hardwares/DeviceSwapchain.h @@ -0,0 +1,63 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include + +namespace ZEngine::Hardwares +{ + struct VulkanDevice; + + struct FrameContext + { + uint32_t Index = std::numeric_limits::max(); + uint32_t ImageIndex = std::numeric_limits::max(); + Rendering::Primitives::Semaphore* Acquired = nullptr; + Rendering::Primitives::Fence* Fence = nullptr; + }; + ZDEFINE_PTR(FrameContext); + + struct DeviceSwapchain + { + Core::Memory::ArenaAllocator Arena = {}; + VulkanDevice* Device = nullptr; + bool HasRecreationPending = false; + uint32_t BufferredFrameCount = 0; + uint32_t SwapchainImageCount = 3; + uint32_t PreviousSwapchainImageCount = 3; + uint32_t SwapchainImageCountChangeCount = 0; + + uint32_t SwapchainImageWidth = std::numeric_limits::max(); + uint32_t SwapchainImageHeight = std::numeric_limits::max(); + uint32_t FrameContextOffset = 0; + uint32_t FrameContextPoolSize = 0; + const uint32_t FrameContextPoolSizeFactor = 4; + // Todo Convert atomic_uint as PaddedAtomic.. + std::atomic_uint IdleFrameCount = 0; + std::atomic_uint IdleFrameThreshold = std::numeric_limits::max(); + uint64_t RenderTimelineNextValue = 0; + VkSwapchainKHR SwapchainHandle = VK_NULL_HANDLE; + FrameContextPtr CurrentFrame = nullptr; + Rendering::Primitives::Semaphore* RenderTimeline = nullptr; + Rendering::Renderers::RenderPasses::Attachment* SwapchainAttachment = nullptr; + Core::Containers::Array FrameContexts = {}; + Core::Containers::Array SwapchainImageViews = {}; + Core::Containers::Array SwapchainFramebuffers = {}; + Core::Containers::Array ImageInFlights = {}; + Core::Containers::Array PresentCompletes = {}; + Core::Containers::Array RenderCompletes = {}; + + void Initialize(VulkanDevice* const device, uint32_t buffered_frame_size); + void Create(); + void Clear(); + void Dispose(); + + void AcquireNextImage(uint32_t frame_context_idx); + void Present(); + }; + ZDEFINE_PTR(DeviceSwapchain); +} // namespace ZEngine::Hardwares diff --git a/ZEngine/ZEngine/Hardwares/VulkanDevice.cpp b/ZEngine/ZEngine/Hardwares/VulkanDevice.cpp index ef44de8f..a0deb584 100644 --- a/ZEngine/ZEngine/Hardwares/VulkanDevice.cpp +++ b/ZEngine/ZEngine/Hardwares/VulkanDevice.cpp @@ -1,5 +1,3 @@ -#include - /* * We define those Macros before inclusion of VulkanDevice.h so we can enable impl from VMA header */ @@ -11,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -25,12 +24,22 @@ using namespace ZEngine::Core::Containers; namespace ZEngine::Hardwares { - void VulkanDevice::Initialize(ZEngine::Core::Memory::ArenaAllocator* arena, Windows::CoreWindow* const window) + void AsyncGPUOperation::Initialize(VulkanDevice* device, uint32_t total_buffer_count) + { + NextValue = 0; + Timeline = ZPushStructCtorArgs(device->Arena, Semaphore, device, true); + RetireValues.init(device->Arena, total_buffer_count, total_buffer_count); + } + + void VulkanDevice::Initialize(ZEngine::Core::Memory::ArenaAllocator* arena, Windows::CoreWindow* const window, uint32_t worker_thread_count) { Arena = arena; CurrentWindow = window; + WorkerThreadCount = worker_thread_count; ShaderReservedBindingSets = {1}; // Todo: we should introduce HashSet<> AsyncResLoader = ZPushStructCtor(Arena, AsyncResourceLoader); + CommandBufferMgr = ZPushStructCtor(Arena, CommandBufferManager); + SwapchainPtr = ZPushStructCtor(Arena, DeviceSwapchain); DefaultDepthFormats.init(Arena, 3); DefaultDepthFormats.push(VK_FORMAT_D32_SFLOAT); @@ -182,33 +191,33 @@ namespace ZEngine::Hardwares for (VkPhysicalDevice physical_device : physical_device_collection) { - VkPhysicalDeviceDescriptorIndexingProperties indexing_properties = {}; - indexing_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES; + VkPhysicalDeviceVulkan12Properties vulkan_1_2_properties = {}; + vulkan_1_2_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES; + + VkPhysicalDeviceProperties2 physical_device_properties = {}; + physical_device_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + physical_device_properties.pNext = &vulkan_1_2_properties; - VkPhysicalDeviceProperties physical_device_properties; - VkPhysicalDeviceProperties2 physical_device_properties2 = {}; - physical_device_properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; - physical_device_properties2.pNext = &indexing_properties; + vkGetPhysicalDeviceProperties2(physical_device, &physical_device_properties); - vkGetPhysicalDeviceProperties(physical_device, &physical_device_properties); - vkGetPhysicalDeviceProperties2(physical_device, &physical_device_properties2); + VkPhysicalDeviceVulkan12Features vulkan_1_2_features = {}; + vulkan_1_2_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES; - VkPhysicalDeviceDescriptorIndexingFeatures descriptor_indexing_features = {}; - descriptor_indexing_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES; - VkPhysicalDeviceFeatures2 physical_device_feature = {}; - physical_device_feature.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; - physical_device_feature.pNext = &descriptor_indexing_features; + VkPhysicalDeviceFeatures2 physical_device_feature = {}; + physical_device_feature.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; + physical_device_feature.pNext = &vulkan_1_2_features; vkGetPhysicalDeviceFeatures2(physical_device, &physical_device_feature); - if (/*(physical_device_feature.geometryShader == VK_TRUE) && (physical_device_feature.samplerAnisotropy == VK_TRUE) && */ ((physical_device_properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU) || (physical_device_properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU))) + if (/*(physical_device_feature.geometryShader == VK_TRUE) && (physical_device_feature.samplerAnisotropy == VK_TRUE) && */ ((physical_device_properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU) || (physical_device_properties.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU))) { PhysicalDevice = physical_device; PhysicalDeviceProperties = physical_device_properties; - PhysicalDeviceDescriptorIndexingProperties = indexing_properties; + PhysicalDeviceVulkan12Properties = vulkan_1_2_properties; PhysicalDeviceFeature = physical_device_feature; - PhysicalDeviceSupportSampledImageBindless = (descriptor_indexing_features.runtimeDescriptorArray == VK_TRUE && descriptor_indexing_features.descriptorBindingSampledImageUpdateAfterBind == VK_TRUE && descriptor_indexing_features.descriptorBindingPartiallyBound == VK_TRUE && descriptor_indexing_features.descriptorBindingUpdateUnusedWhilePending == VK_TRUE); - PhysicalDeviceSupportStorageBufferBindless = (descriptor_indexing_features.runtimeDescriptorArray == VK_TRUE && descriptor_indexing_features.descriptorBindingPartiallyBound == VK_TRUE); + PhysicalDeviceSupportSampledImageBindless = (vulkan_1_2_features.runtimeDescriptorArray == VK_TRUE && vulkan_1_2_features.descriptorBindingSampledImageUpdateAfterBind == VK_TRUE && vulkan_1_2_features.descriptorBindingPartiallyBound == VK_TRUE && vulkan_1_2_features.descriptorBindingUpdateUnusedWhilePending == VK_TRUE); + PhysicalDeviceSupportStorageBufferBindless = (vulkan_1_2_features.runtimeDescriptorArray == VK_TRUE && vulkan_1_2_features.descriptorBindingPartiallyBound == VK_TRUE); vkGetPhysicalDeviceMemoryProperties(PhysicalDevice, &PhysicalDeviceMemoryProperties); + PhysicalDeviceSupportTimelineSemaphore = (vulkan_1_2_features.timelineSemaphore == VK_TRUE); break; } } @@ -296,36 +305,39 @@ namespace ZEngine::Hardwares /* * Enabling some features */ - VkDeviceCreateInfo device_create_info = {}; - device_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; - device_create_info.queueCreateInfoCount = queue_create_info_collection.size(); - device_create_info.pQueueCreateInfos = queue_create_info_collection.data(); - device_create_info.enabledExtensionCount = static_cast(requested_device_extension_layer_name_collection.size()); - device_create_info.ppEnabledExtensionNames = (requested_device_extension_layer_name_collection.size() > 0) ? requested_device_extension_layer_name_collection.data() : nullptr; - device_create_info.pEnabledFeatures = nullptr; - - VkPhysicalDeviceDescriptorIndexingFeatures physical_device_descriptor_indexing_features = {}; - physical_device_descriptor_indexing_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES; - VkPhysicalDeviceFeatures2 device_features_2 = {}; - device_features_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; - device_features_2.features.drawIndirectFirstInstance = PhysicalDeviceFeature.features.drawIndirectFirstInstance; - device_features_2.features.multiDrawIndirect = PhysicalDeviceFeature.features.multiDrawIndirect; - device_features_2.features.samplerAnisotropy = PhysicalDeviceFeature.features.samplerAnisotropy; - device_features_2.features.shaderInt64 = PhysicalDeviceFeature.features.shaderInt64; + VkDeviceCreateInfo device_create_info = {}; + device_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + device_create_info.queueCreateInfoCount = queue_create_info_collection.size(); + device_create_info.pQueueCreateInfos = queue_create_info_collection.data(); + device_create_info.enabledExtensionCount = static_cast(requested_device_extension_layer_name_collection.size()); + device_create_info.ppEnabledExtensionNames = (requested_device_extension_layer_name_collection.size() > 0) ? requested_device_extension_layer_name_collection.data() : nullptr; + + VkPhysicalDeviceVulkan12Features vulkan_1_2_features = {}; + vulkan_1_2_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES; + if (PhysicalDeviceSupportTimelineSemaphore) + { + vulkan_1_2_features.timelineSemaphore = VK_TRUE; + } + + VkPhysicalDeviceFeatures2 device_features_2 = {}; + device_features_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; + device_features_2.features.drawIndirectFirstInstance = PhysicalDeviceFeature.features.drawIndirectFirstInstance; + device_features_2.features.multiDrawIndirect = PhysicalDeviceFeature.features.multiDrawIndirect; + device_features_2.features.samplerAnisotropy = PhysicalDeviceFeature.features.samplerAnisotropy; if (PhysicalDeviceSupportSampledImageBindless || PhysicalDeviceSupportStorageBufferBindless) { if (PhysicalDeviceSupportSampledImageBindless) { - physical_device_descriptor_indexing_features.descriptorBindingUpdateUnusedWhilePending = VK_TRUE; - physical_device_descriptor_indexing_features.shaderSampledImageArrayNonUniformIndexing = VK_TRUE; - physical_device_descriptor_indexing_features.descriptorBindingSampledImageUpdateAfterBind = VK_TRUE; + vulkan_1_2_features.descriptorBindingUpdateUnusedWhilePending = VK_TRUE; + vulkan_1_2_features.shaderSampledImageArrayNonUniformIndexing = VK_TRUE; + vulkan_1_2_features.descriptorBindingSampledImageUpdateAfterBind = VK_TRUE; } - physical_device_descriptor_indexing_features.descriptorBindingPartiallyBound = VK_TRUE; - physical_device_descriptor_indexing_features.runtimeDescriptorArray = VK_TRUE; + vulkan_1_2_features.descriptorBindingPartiallyBound = VK_TRUE; + vulkan_1_2_features.runtimeDescriptorArray = VK_TRUE; - device_features_2.pNext = &physical_device_descriptor_indexing_features; + device_features_2.pNext = &vulkan_1_2_features; } device_create_info.pNext = &device_features_2; @@ -412,49 +424,53 @@ namespace ZEngine::Hardwares /* * Creating Swapchain */ - Specifications::AttachmentSpecification attachment_specification = {.BindPoint = Specifications::PipelineBindPoint::GRAPHIC}; - attachment_specification.ColorsMap.init(Arena, 2); - attachment_specification.ColorsMap[0] = {}; - attachment_specification.ColorsMap[0].Format = ImageFormat::FORMAT_FROM_DEVICE; - attachment_specification.ColorsMap[0].Load = LoadOperation::CLEAR; - attachment_specification.ColorsMap[0].Store = StoreOperation::STORE; - attachment_specification.ColorsMap[0].Initial = ImageLayout::UNDEFINED; - attachment_specification.ColorsMap[0].Final = ImageLayout::PRESENT_SRC; - attachment_specification.ColorsMap[0].ReferenceLayout = ImageLayout::COLOR_ATTACHMENT_OPTIMAL; - SwapchainAttachment = ZPushStructCtorArgs(Arena, Rendering::Renderers::RenderPasses::Attachment, this, attachment_specification); - PreviousFrameIndex = 0; - CurrentFrameIndex = 0; + // todo(jeanphilippekernel): Should pass MaxFrameCount instead of hard-coded 3 + SwapchainPtr->Initialize(this, 3); - CreateSwapchain(); - - SwapchainRenderCompleteSemaphores.init(Arena, SwapchainImageCount, SwapchainImageCount); - SwapchainAcquiredSemaphores.init(Arena, SwapchainImageCount, SwapchainImageCount); - SwapchainSignalFences.init(Arena, SwapchainImageCount, SwapchainImageCount); - - for (int i = 0; i < SwapchainImageCount; ++i) - { - SwapchainAcquiredSemaphores[i] = ZPushStructCtorArgs(Arena, Primitives::Semaphore, this); - SwapchainRenderCompleteSemaphores[i] = ZPushStructCtorArgs(Arena, Primitives::Semaphore, this); - SwapchainSignalFences[i] = ZPushStructCtorArgs(Arena, Primitives::Fence, this, true); - } + /* + * Creating Buffer Manager + */ + CommandBufferMgr->Initialize(this, SwapchainPtr->BufferredFrameCount); /* - * Creating Global Descriptor Pool for : Textures + * Creating Global Descriptor Pool for : Textures, Samplers */ - MaxGlobalTexture = std::min(MaxGlobalTexture, PhysicalDeviceDescriptorIndexingProperties.maxDescriptorSetUpdateAfterBindSamplers - 1); + VkSamplerCreateInfo sampler_create_info = {}; + sampler_create_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + sampler_create_info.minFilter = VK_FILTER_LINEAR; + sampler_create_info.magFilter = VK_FILTER_LINEAR; + sampler_create_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_create_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_create_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_create_info.anisotropyEnable = PhysicalDeviceFeature.features.samplerAnisotropy; + sampler_create_info.maxAnisotropy = PhysicalDeviceFeature.features.samplerAnisotropy ? PhysicalDeviceProperties.properties.limits.maxSamplerAnisotropy : 1.0f; + sampler_create_info.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; + sampler_create_info.unnormalizedCoordinates = VK_FALSE; + sampler_create_info.compareEnable = VK_FALSE; + sampler_create_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + sampler_create_info.mipLodBias = 0.0f; + sampler_create_info.minLod = 0.0f; + sampler_create_info.maxLod = VK_LOD_CLAMP_NONE; + + ZENGINE_VALIDATE_ASSERT(vkCreateSampler(LogicalDevice, &sampler_create_info, nullptr, &GlobalLinearWrapSampler) == VK_SUCCESS, "Failed to create Texture Sampler") + + GlobalLinearWrapSamplerImageInfo = VkDescriptorImageInfo{.sampler = GlobalLinearWrapSampler, .imageView = VK_NULL_HANDLE, .imageLayout = VK_IMAGE_LAYOUT_UNDEFINED}; + MaxGlobalTexture = std::min(MaxGlobalTexture, PhysicalDeviceVulkan12Properties.maxPerStageDescriptorUpdateAfterBindSampledImages - 1); GlobalTextures.Initialize(Arena, MaxGlobalTexture); Image2DBufferManager.Initialize(Arena, MaxGlobalTexture); ShaderReservedLayoutBindingSpecificationMap.init(Arena, 1); - ShaderReservedLayoutBindingSpecificationMap[1].init(Arena, 1); - ShaderReservedLayoutBindingSpecificationMap[1].push(LayoutBindingSpecification{.Set = 1, .Binding = 0, .Count = MaxGlobalTexture, .Name = "TextureArray", .DescriptorTypeValue = DescriptorType::COMBINED_IMAGE_SAMPLER, .Flags = ShaderStageFlags::FRAGMENT}); + ShaderReservedLayoutBindingSpecificationMap[1].init(Arena, 2); + ShaderReservedLayoutBindingSpecificationMap[1].push(LayoutBindingSpecification{.Set = 1, .Binding = 0, .Count = MaxGlobalTexture, .Name = "TextureArray", .DescriptorTypeValue = DescriptorType::SAMPLED_IMAGE, .Flags = ShaderStageFlags::FRAGMENT}); + ShaderReservedLayoutBindingSpecificationMap[1].push(LayoutBindingSpecification{.Set = 1, .Binding = 1, .Count = 1, .Name = "LinearWrapSampler", .DescriptorTypeValue = DescriptorType::SAMPLER, .Flags = ShaderStageFlags::FRAGMENT}); ShaderReservedDescriptorSetMap.init(Arena, ShaderReservedLayoutBindingSpecificationMap.size()); ShaderReservedDescriptorSetLayoutMap.init(Arena, ShaderReservedLayoutBindingSpecificationMap.size()); VkDescriptorPoolSize pool_sizes[] = { - {.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .descriptorCount = (MaxGlobalTexture * SwapchainImageCount)} + {.type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, .descriptorCount = (MaxGlobalTexture * SwapchainPtr->BufferredFrameCount)}, + { .type = VK_DESCRIPTOR_TYPE_SAMPLER, .descriptorCount = (1 * SwapchainPtr->BufferredFrameCount)} }; for (const auto& layout_binding_set : ShaderReservedLayoutBindingSpecificationMap) @@ -514,14 +530,14 @@ namespace ZEngine::Hardwares VkDescriptorPoolCreateInfo pool_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO}; pool_info.flags = PhysicalDeviceSupportSampledImageBindless ? VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT : 0; - pool_info.maxSets = SwapchainImageCount; + pool_info.maxSets = SwapchainPtr->BufferredFrameCount; pool_info.poolSizeCount = sizeof(pool_sizes) / sizeof(VkDescriptorPoolSize); pool_info.pPoolSizes = pool_sizes; ZENGINE_VALIDATE_ASSERT(vkCreateDescriptorPool(LogicalDevice, &pool_info, nullptr, &GlobalDescriptorPoolHandle) == VK_SUCCESS, "Failed to create Global DescriptorPool") for (const auto& layout : ShaderReservedDescriptorSetLayoutMap) { - ShaderReservedDescriptorSetMap[layout.first].init(Arena, SwapchainImageCount, SwapchainImageCount); + ShaderReservedDescriptorSetMap[layout.first].init(Arena, SwapchainPtr->BufferredFrameCount, SwapchainPtr->BufferredFrameCount); } scratch = ZGetScratch(Arena); @@ -529,8 +545,8 @@ namespace ZEngine::Hardwares for (const auto& layout : ShaderReservedDescriptorSetLayoutMap) { Array layout_set = {}; - layout_set.init(scratch.Arena, SwapchainImageCount, SwapchainImageCount); - for (uint32_t i = 0; i < SwapchainImageCount; ++i) + layout_set.init(scratch.Arena, SwapchainPtr->BufferredFrameCount, SwapchainPtr->BufferredFrameCount); + for (uint32_t i = 0; i < SwapchainPtr->BufferredFrameCount; ++i) { layout_set[i] = layout.second; } @@ -538,7 +554,7 @@ namespace ZEngine::Hardwares VkDescriptorSetAllocateInfo descriptor_set_allocate_info = {}; descriptor_set_allocate_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; descriptor_set_allocate_info.descriptorPool = GlobalDescriptorPoolHandle; - descriptor_set_allocate_info.descriptorSetCount = SwapchainImageCount; + descriptor_set_allocate_info.descriptorSetCount = SwapchainPtr->BufferredFrameCount; descriptor_set_allocate_info.pSetLayouts = layout_set.data(); ZENGINE_VALIDATE_ASSERT(vkAllocateDescriptorSets(LogicalDevice, &descriptor_set_allocate_info, ShaderReservedDescriptorSetMap[layout.first].data()) == VK_SUCCESS, "Failed to create DescriptorSet") } @@ -553,11 +569,8 @@ namespace ZEngine::Hardwares void VulkanDevice::Deinitialize() { QueueWaitAll(); - { - std::unique_lock l(DirtyMutex); - RunningDirtyCollector = false; - } - DirtyCollectorCond.notify_one(); + + RunningDirtyCollector.store(false, std::memory_order_release); AsyncResLoader->Shutdown(); @@ -570,17 +583,8 @@ namespace ZEngine::Hardwares UniformBufferSetManager.Dispose(); ShaderManager.Dispose(); - SwapchainSignalFences.clear(); - SwapchainAcquiredSemaphores.clear(); - SwapchainRenderCompleteSemaphores.clear(); - - DisposeSwapchain(); - SwapchainAttachment->Dispose(); - - SwapchainFramebuffers.clear(); - SwapchainImageViews.clear(); - - m_buffer_manager.Deinitialize(); + SwapchainPtr->Dispose(); + CommandBufferMgr->Deinitialize(); for (auto set_layout : ShaderReservedDescriptorSetLayoutMap) { @@ -603,8 +607,6 @@ namespace ZEngine::Hardwares ZENGINE_DESTROY_VULKAN_HANDLE(Instance, vkDestroySurfaceKHR, Surface, nullptr) } - void VulkanDevice::Update() {} - void VulkanDevice::Dispose() { vmaDestroyAllocator(VmaAllocatorValue); @@ -615,11 +617,53 @@ namespace ZEngine::Hardwares __destroyDebugMessengerPtr = nullptr; __createDebugMessengerPtr = nullptr; } + vkDestroySampler(LogicalDevice, GlobalLinearWrapSampler, nullptr); vkDestroyDevice(LogicalDevice, nullptr); vkDestroyInstance(Instance, nullptr); - LogicalDevice = VK_NULL_HANDLE; - Instance = VK_NULL_HANDLE; + GlobalLinearWrapSampler = VK_NULL_HANDLE; + LogicalDevice = VK_NULL_HANDLE; + Instance = VK_NULL_HANDLE; + } + + void VulkanDevice::QueueSubmit(CommandBuffer* const command_buffer, Rendering::Primitives::Semaphore* const signal_semaphore, uint32_t wait_flag, uint64_t signal_value, uint64_t wait_value, Rendering::Primitives::Semaphore* const wait_semaphore) + { + ZENGINE_VALIDATE_ASSERT(command_buffer->GetState() == CommandBufferState::Executable, "Command buffer must be in executable state to be submitted.") + ZENGINE_VALIDATE_ASSERT(signal_semaphore->IsTimeline == true, "Signal semaphore must be a timeline semaphore.") + + bool has_wait = (wait_semaphore != nullptr && wait_value != UINT64_MAX); + + VkPipelineStageFlags flag = VkPipelineStageFlagBits(wait_flag); + + VkCommandBuffer command_buffers[] = {command_buffer->GetHandle()}; + VkSemaphore semaphores[] = {signal_semaphore->GetHandle()}; + VkSemaphore wait_sems[] = {has_wait ? wait_semaphore->GetHandle() : VK_NULL_HANDLE}; + uint64_t wait_values[] = {wait_value}; + uint64_t signal_values[] = {signal_value}; + + VkTimelineSemaphoreSubmitInfo timeline_semaphore_submit_info = { + .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, + .pNext = nullptr, + .waitSemaphoreValueCount = has_wait ? 1u : 0u, + .pWaitSemaphoreValues = has_wait ? wait_values : nullptr, + .signalSemaphoreValueCount = 1, + .pSignalSemaphoreValues = signal_values, + }; + + VkSubmitInfo submit_info = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = &timeline_semaphore_submit_info, + .waitSemaphoreCount = has_wait ? 1u : 0u, + .pWaitSemaphores = has_wait ? wait_sems : nullptr, + .pWaitDstStageMask = has_wait ? &flag : nullptr, + .commandBufferCount = 1, + .pCommandBuffers = command_buffers, + .signalSemaphoreCount = 1, + .pSignalSemaphores = semaphores, + }; + + ZENGINE_VALIDATE_ASSERT(vkQueueSubmit(GetQueue(command_buffer->QueueType).Handle, 1, &submit_info, VK_NULL_HANDLE) == VK_SUCCESS, "Failed to submit queue") + command_buffer->SetState(CommandBufferState::Pending); } bool VulkanDevice::QueueSubmit(const VkPipelineStageFlags wait_stage_flag, CommandBuffer* command_buffer, Rendering::Primitives::Semaphore* const signal_semaphore, Rendering::Primitives::Fence* const fence) @@ -647,7 +691,7 @@ namespace ZEngine::Hardwares }; ZENGINE_VALIDATE_ASSERT(vkQueueSubmit(GetQueue(command_buffer->QueueType).Handle, 1, &submit_info, fence->GetHandle()) == VK_SUCCESS, "Failed to submit queue") - command_buffer->SetState(CommanBufferState::Pending); + command_buffer->SetState(CommandBufferState::Pending); fence->SetState(FenceState::Submitted); signal_semaphore->SetState(SemaphoreState::Submitted); @@ -660,7 +704,7 @@ namespace ZEngine::Hardwares fence->Reset(); signal_semaphore->SetState(Rendering::Primitives::SemaphoreState::Idle); - command_buffer->SetState(CommanBufferState::Invalid); + command_buffer->SetState(CommandBufferState::Invalid); return true; } @@ -669,7 +713,7 @@ namespace ZEngine::Hardwares { if (handle) { - DirtyResources.Add({.FrameIndex = CurrentFrameIndex, .Handle = handle, .Type = resource_type}); + DirtyResources.Add({.FrameIndex = SwapchainPtr->CurrentFrame->Index, .Handle = handle, .Type = resource_type}); } } @@ -733,14 +777,14 @@ namespace ZEngine::Hardwares ZENGINE_CORE_WARN("{}", pCallbackData->pMessage) } - if ((messageSeverity & static_cast(VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT)) == VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT) + if ((messageSeverity & static_cast(VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT)) == VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) { - ZENGINE_CORE_WARN("{}", pCallbackData->pMessage) + ZENGINE_CORE_TRACE("{}", pCallbackData->pMessage) } - if ((messageSeverity & static_cast(VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT)) == VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT) + if ((messageSeverity & static_cast(VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT)) == VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) { - ZENGINE_CORE_WARN("{}", pCallbackData->pMessage) + ZENGINE_CORE_INFO("{}", pCallbackData->pMessage) } return VK_FALSE; @@ -762,7 +806,7 @@ namespace ZEngine::Hardwares switch (res_handle.Type) { case Rendering::DeviceResourceType::SAMPLER: - vkDestroySampler(LogicalDevice, reinterpret_cast(res_handle.Handle), nullptr); + // vkDestroySampler(LogicalDevice, reinterpret_cast(res_handle.Handle), nullptr); break; case Rendering::DeviceResourceType::FRAMEBUFFER: vkDestroyFramebuffer(LogicalDevice, reinterpret_cast(res_handle.Handle), nullptr); @@ -846,7 +890,7 @@ namespace ZEngine::Hardwares BufferImage& buffer = DirtyBufferImages[handle]; vkDestroyImageView(LogicalDevice, buffer.ViewHandle, nullptr); - vkDestroySampler(LogicalDevice, buffer.Sampler, nullptr); + // vkDestroySampler(LogicalDevice, buffer.Sampler, nullptr); vmaDestroyImage(VmaAllocatorValue, buffer.Handle, buffer.Allocation); DirtyBufferImages.Remove(handle); @@ -887,7 +931,7 @@ namespace ZEngine::Hardwares Helpers::secure_memset(allocation_info.pMappedData, 0, byte_size, byte_size); // Metadata info - buffer_view.FrameIndex = CurrentFrameIndex; + buffer_view.FrameIndex = SwapchainPtr->CurrentFrame == nullptr ? 0u : SwapchainPtr->CurrentFrame->Index; if (buffer_usage & VK_BUFFER_USAGE_VERTEX_BUFFER_BIT) { @@ -913,18 +957,16 @@ namespace ZEngine::Hardwares return buffer_view; } - void VulkanDevice::CopyBuffer(const BufferView& source, const BufferView& destination, VkDeviceSize byte_size, VkDeviceSize src_buffer_offset, VkDeviceSize dst_buffer_offset) + VkPipelineStageFlags VulkanDevice::CopyBuffer(CommandBuffer* command_buffer, const BufferView& source, const BufferView& destination, VkDeviceSize byte_size, VkDeviceSize src_buffer_offset, VkDeviceSize dst_buffer_offset) { - auto command_buffer = GetInstantCommandBuffer(Rendering::QueueType::GRAPHIC_QUEUE); - - VkBufferMemoryBarrier bufMemBarrier = {VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER}; - bufMemBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; - bufMemBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - bufMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - bufMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - bufMemBarrier.buffer = source.Handle; - bufMemBarrier.offset = 0; - bufMemBarrier.size = VK_WHOLE_SIZE; + VkBufferMemoryBarrier bufMemBarrier = {VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER}; + bufMemBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + bufMemBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + bufMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.buffer = source.Handle; + bufMemBarrier.offset = 0; + bufMemBarrier.size = VK_WHOLE_SIZE; vkCmdPipelineBarrier(command_buffer->GetHandle(), VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 1, &bufMemBarrier, 0, nullptr); @@ -978,7 +1020,7 @@ namespace ZEngine::Hardwares vkCmdPipelineBarrier(command_buffer->GetHandle(), VK_PIPELINE_STAGE_TRANSFER_BIT, dst_pipeline_stage, 0, 0, nullptr, 1, &bufMemBarrier2, 0, nullptr); - EnqueueInstantCommandBuffer(command_buffer, dst_pipeline_stage); + return dst_pipeline_stage; } BufferImage VulkanDevice::CreateImage(uint32_t width, uint32_t height, VkImageType image_type, VkImageViewType image_view_type, VkFormat image_format, VkImageTiling image_tiling, VkImageLayout image_initial_layout, VkImageUsageFlags image_usage, VkSharingMode image_sharing_mode, VkSampleCountFlagBits image_sample_count, VkMemoryPropertyFlags requested_properties, VkImageAspectFlagBits image_aspect_flag, uint32_t layer_count, VkImageCreateFlags image_create_flag_bit) @@ -1008,41 +1050,14 @@ namespace ZEngine::Hardwares ZENGINE_VALIDATE_ASSERT(vmaCreateImage(VmaAllocatorValue, &image_create_info, &allocation_create_info, &(buffer_image.Handle), &(buffer_image.Allocation), nullptr) == VK_SUCCESS, "Failed to create buffer"); buffer_image.ViewHandle = CreateImageView(buffer_image.Handle, image_format, image_view_type, image_aspect_flag, layer_count); - buffer_image.Sampler = CreateImageSampler(); + // buffer_image.Sampler = GlobalLinearWrapSampler; // Metadata info - buffer_image.FrameIndex = CurrentFrameIndex; + buffer_image.FrameIndex = SwapchainPtr->CurrentFrame == nullptr ? 0u : SwapchainPtr->CurrentFrame->Index; return buffer_image; } - VkSampler VulkanDevice::CreateImageSampler() - { - VkSampler sampler{VK_NULL_HANDLE}; - - VkSamplerCreateInfo sampler_create_info = {}; - sampler_create_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; - sampler_create_info.minFilter = VK_FILTER_LINEAR; - sampler_create_info.magFilter = VK_FILTER_NEAREST; - sampler_create_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; - sampler_create_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; - sampler_create_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; - sampler_create_info.anisotropyEnable = PhysicalDeviceFeature.features.samplerAnisotropy; - sampler_create_info.maxAnisotropy = PhysicalDeviceFeature.features.samplerAnisotropy ? PhysicalDeviceProperties.limits.maxSamplerAnisotropy : 1.0f; - sampler_create_info.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; - sampler_create_info.unnormalizedCoordinates = VK_FALSE; - sampler_create_info.compareEnable = VK_FALSE; - sampler_create_info.compareOp = VK_COMPARE_OP_ALWAYS; - sampler_create_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; - sampler_create_info.mipLodBias = 0.0f; - sampler_create_info.minLod = -1000.0f; - sampler_create_info.maxLod = 1000.0f; - - ZENGINE_VALIDATE_ASSERT(vkCreateSampler(LogicalDevice, &sampler_create_info, nullptr, &sampler) == VK_SUCCESS, "Failed to create Texture Sampler") - - return sampler; - } - VkFormat VulkanDevice::FindSupportedFormat(Core::Containers::ArrayView format_collection, VkImageTiling image_tiling, VkFormatFeatureFlags feature_flags) { VkFormat supported_format = VK_FORMAT_UNDEFINED; @@ -1123,9 +1138,9 @@ namespace ZEngine::Hardwares { auto handle = VertexBufferSetManager.Create(); auto buffer_set = VertexBufferSetManager.Access(handle); - buffer_set->set.init(Arena, SwapchainImageCount, SwapchainImageCount); + buffer_set->set.init(Arena, SwapchainPtr->BufferredFrameCount, SwapchainPtr->BufferredFrameCount); - for (unsigned i = 0; i < SwapchainImageCount; ++i) + for (unsigned i = 0; i < SwapchainPtr->BufferredFrameCount; ++i) { buffer_set->set[i] = ZPushStructCtorArgs(Arena, VertexBuffer, this); } @@ -1137,9 +1152,9 @@ namespace ZEngine::Hardwares { auto handle = StorageBufferSetManager.Create(); auto buffer = StorageBufferSetManager.Access(handle); - buffer->set.init(Arena, SwapchainImageCount, SwapchainImageCount); + buffer->set.init(Arena, SwapchainPtr->BufferredFrameCount, SwapchainPtr->BufferredFrameCount); - for (unsigned i = 0; i < SwapchainImageCount; ++i) + for (unsigned i = 0; i < SwapchainPtr->BufferredFrameCount; ++i) { buffer->set[i] = ZPushStructCtorArgs(Arena, StorageBuffer, this); } @@ -1150,9 +1165,9 @@ namespace ZEngine::Hardwares { auto handle = IndirectBufferSetManager.Create(); auto buffer = IndirectBufferSetManager.Access(handle); - buffer->set.init(Arena, SwapchainImageCount, SwapchainImageCount); + buffer->set.init(Arena, SwapchainPtr->BufferredFrameCount, SwapchainPtr->BufferredFrameCount); - for (unsigned i = 0; i < SwapchainImageCount; ++i) + for (unsigned i = 0; i < SwapchainPtr->BufferredFrameCount; ++i) { buffer->set[i] = ZPushStructCtorArgs(Arena, IndirectBuffer, this); } @@ -1164,9 +1179,9 @@ namespace ZEngine::Hardwares { auto handle = IndexBufferSetManager.Create(); auto buffer = IndexBufferSetManager.Access(handle); - buffer->set.init(Arena, SwapchainImageCount, SwapchainImageCount); + buffer->set.init(Arena, SwapchainPtr->BufferredFrameCount, SwapchainPtr->BufferredFrameCount); - for (unsigned i = 0; i < SwapchainImageCount; ++i) + for (unsigned i = 0; i < SwapchainPtr->BufferredFrameCount; ++i) { buffer->set[i] = ZPushStructCtorArgs(Arena, IndexBuffer, this); } @@ -1178,9 +1193,9 @@ namespace ZEngine::Hardwares { auto handle = UniformBufferSetManager.Create(); auto buffer = UniformBufferSetManager.Access(handle); - buffer->set.init(Arena, SwapchainImageCount, SwapchainImageCount); + buffer->set.init(Arena, SwapchainPtr->BufferredFrameCount, SwapchainPtr->BufferredFrameCount); - for (unsigned i = 0; i < SwapchainImageCount; ++i) + for (unsigned i = 0; i < SwapchainPtr->BufferredFrameCount; ++i) { buffer->set[i] = ZPushStructCtorArgs(Arena, UniformBuffer, this); } @@ -1188,341 +1203,21 @@ namespace ZEngine::Hardwares return handle; } - void VulkanDevice::CreateSwapchain() - { - VkSurfaceCapabilitiesKHR capabilities{}; - vkGetPhysicalDeviceSurfaceCapabilitiesKHR(PhysicalDevice, Surface, &capabilities); - if (capabilities.currentExtent.width != std::numeric_limits::max()) - { - SwapchainImageWidth = capabilities.currentExtent.width; - SwapchainImageHeight = capabilities.currentExtent.height; - } - - auto min_image_count = std::clamp(capabilities.minImageCount, capabilities.minImageCount, capabilities.maxImageCount == 0 ? capabilities.minImageCount + 1 : capabilities.maxImageCount); - VkSwapchainCreateInfoKHR swapchain_create_info = { - .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, .pNext = nullptr, .surface = Surface, .minImageCount = min_image_count, .imageFormat = SurfaceFormat.format, .imageColorSpace = SurfaceFormat.colorSpace, .imageExtent = VkExtent2D{.width = SwapchainImageWidth, .height = SwapchainImageHeight}, - .imageArrayLayers = 1, .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT, .preTransform = capabilities.currentTransform, .compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR, .presentMode = PresentMode, .clipped = VK_TRUE - }; - - auto scratch = ZGetScratch(Arena); - - Array family_indice = {}; - uint32_t family_indice_count = HasSeperateTransfertQueueFamily ? 2 : 1; - family_indice.init(scratch.Arena, family_indice_count, family_indice_count); - family_indice[0] = GraphicFamilyIndex; - if (HasSeperateTransfertQueueFamily) - { - family_indice[1] = TransferFamilyIndex; - } - swapchain_create_info.imageSharingMode = HasSeperateTransfertQueueFamily ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE; - swapchain_create_info.queueFamilyIndexCount = HasSeperateTransfertQueueFamily ? 2 : 1; - swapchain_create_info.pQueueFamilyIndices = family_indice.data(); - - ZENGINE_VALIDATE_ASSERT(vkCreateSwapchainKHR(LogicalDevice, &swapchain_create_info, nullptr, &SwapchainHandle) == VK_SUCCESS, "Failed to create Swapchain") - - ZReleaseScratch(scratch); - - uint32_t image_count = 0; - ZENGINE_VALIDATE_ASSERT(vkGetSwapchainImagesKHR(LogicalDevice, SwapchainHandle, &image_count, nullptr) == VK_SUCCESS, "Failed to get Images count from Swapchain") - - bool swapchainImageCountChanged = false; - if (image_count != SwapchainImageCount) - { - ZENGINE_CORE_WARN("Max Swapchain image count supported is {}, but requested {}", image_count, SwapchainImageCount) - auto old_swapchain_image_count = SwapchainImageCount; - SwapchainImageCount = image_count; - ZENGINE_CORE_WARN("Swapchain image count has changed from {} to {}", old_swapchain_image_count, image_count) - - swapchainImageCountChanged = true; - } - - if ((SwapchainImageCountChangeCount > 0) && (PreviousSwapchainImageCount != SwapchainImageCount)) - { - ZENGINE_CORE_WARN("Swapchain image count has changed from previous creation") - - auto delta = SwapchainImageCount - PreviousSwapchainImageCount; - - // When delta is less or equal of zero, it means we have enough memory to handle ops - if (delta > 0 && delta < std::numeric_limits::max()) - { - SwapchainImageViews.push({}); - SwapchainFramebuffers.push({}); - - m_buffer_manager.IncreaseBuffers(); - // EnqueuedCommandbuffers.reserve(m_buffer_manager.TotalCommandBufferCount); - } - } - else - { - if (SwapchainImageViews.capacity() <= 0) - { - SwapchainImageViews.init(Arena, SwapchainImageCount, SwapchainImageCount); - } - - if (SwapchainFramebuffers.capacity() <= 0) - { - SwapchainFramebuffers.init(Arena, SwapchainImageCount, SwapchainImageCount); - } - - if (!m_buffer_manager.IsInitialized()) - { - m_buffer_manager.Initialize(this); - } - } - - scratch = ZGetScratch(Arena); - - Array SwapchainImages = {}; - SwapchainImages.init(scratch.Arena, SwapchainImageCount, SwapchainImageCount); - ZENGINE_VALIDATE_ASSERT(vkGetSwapchainImagesKHR(LogicalDevice, SwapchainHandle, &SwapchainImageCount, SwapchainImages.data()) == VK_SUCCESS, "Failed to get VkImages from Swapchain") - - /*Transition Image from Undefined to Present_src*/ - auto command_buffer = GetInstantCommandBuffer(Rendering::QueueType::GRAPHIC_QUEUE); - { - for (int i = 0; i < SwapchainImages.size(); ++i) - { - Rendering::Specifications::ImageMemoryBarrierSpecification barrier_spec = {}; - barrier_spec.ImageHandle = SwapchainImages[i]; - barrier_spec.OldLayout = Specifications::ImageLayout::UNDEFINED; - barrier_spec.NewLayout = Specifications::ImageLayout::PRESENT_SRC; - barrier_spec.ImageAspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - barrier_spec.SourceAccessMask = 0; - barrier_spec.DestinationAccessMask = VK_ACCESS_MEMORY_READ_BIT; - barrier_spec.SourceStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; - barrier_spec.DestinationStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; - barrier_spec.LayerCount = 1; - - Rendering::Primitives::ImageMemoryBarrier barrier{barrier_spec}; - command_buffer->TransitionImageLayout(barrier); - } - } - EnqueueInstantCommandBuffer(command_buffer); - - for (int i = 0; i < SwapchainImageCount; ++i) - { - SwapchainImageViews[i] = CreateImageView(SwapchainImages[i], SurfaceFormat.format, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_ASPECT_COLOR_BIT); - - Array fb_images_views; - fb_images_views.init(scratch.Arena, 1); - fb_images_views.push(SwapchainImageViews[i]); - SwapchainFramebuffers[i] = CreateFramebuffer(ArrayView{fb_images_views}, SwapchainAttachment->GetHandle(), SwapchainImageWidth, SwapchainImageHeight); - } - - ZReleaseScratch(scratch); - - if (swapchainImageCountChanged) - { - SwapchainImageCountChangeCount++; - } - } - - void VulkanDevice::ResizeSwapchain() - { - DisposeSwapchain(); - - ZENGINE_DESTROY_VULKAN_HANDLE(Instance, vkDestroySurfaceKHR, Surface, nullptr) - ZENGINE_VALIDATE_ASSERT(CurrentWindow->CreateSurface(Instance, reinterpret_cast(&Surface)), "Failed Window Surface from GLFW") - - CreateSwapchain(); - } - - void VulkanDevice::DisposeSwapchain() - { - PreviousSwapchainImageCount = SwapchainImageCount; - - for (VkImageView image_view : SwapchainImageViews) - { - if (image_view) - { - EnqueueForDeletion(DeviceResourceType::IMAGEVIEW, image_view); - } - } - - for (VkFramebuffer framebuffer : SwapchainFramebuffers) - { - if (framebuffer) - { - EnqueueForDeletion(DeviceResourceType::FRAMEBUFFER, framebuffer); - } - } - - // We don't call .clear() because we want to reuse the allocated space - // SwapchainImageViews.clear(); - // SwapchainFramebuffers.clear(); - - ZENGINE_DESTROY_VULKAN_HANDLE(LogicalDevice, vkDestroySwapchainKHR, SwapchainHandle, nullptr) - } - - void VulkanDevice::NewFrame() - { - Primitives::Fence* signal_fence = SwapchainSignalFences[CurrentFrameIndex]; - if (!signal_fence->IsSignaled()) - { - if (!signal_fence->Wait(UINT64_MAX)) - { - return; - } - } - - signal_fence->Reset(); - Primitives::Semaphore* acquired_semaphore = SwapchainAcquiredSemaphores[CurrentFrameIndex]; - ZENGINE_VALIDATE_ASSERT(acquired_semaphore->GetState() != Primitives::SemaphoreState::Submitted, "") - - VkResult acquire_image_result = vkAcquireNextImageKHR(LogicalDevice, SwapchainHandle, UINT64_MAX, acquired_semaphore->GetHandle(), VK_NULL_HANDLE, &SwapchainImageIndex); - acquired_semaphore->SetState(Primitives::SemaphoreState::Submitted); - - if (acquire_image_result == VK_ERROR_OUT_OF_DATE_KHR) - { - ResizeSwapchain(); - } - - m_buffer_manager.ResetPool(CurrentFrameIndex); - } - - void VulkanDevice::Present() - { - Textures::TextureHandle tex_handle = {}; - if (TextureHandleToUpdates.Pop(tex_handle)) - { - auto texture = GlobalTextures.Access(tex_handle); - - if (!texture) - { - TextureHandleToUpdates.Enqueue(tex_handle); - return; - } - - else - { - auto img_buf = Image2DBufferManager.Access(texture->BufferHandle); - const auto& image_info = img_buf->GetDescriptorImageInfo(); - - auto scratch = ZGetScratch(Arena); - { - Array write_descriptor_sets = {}; - write_descriptor_sets.init(scratch.Arena, WriteBindlessDescriptorSetRequests.size()); - - for (auto& req : WriteBindlessDescriptorSetRequests) - { - write_descriptor_sets.push(VkWriteDescriptorSet{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .pNext = nullptr, .dstSet = req.DstSet, .dstBinding = req.Binding, .dstArrayElement = (uint32_t) tex_handle.Index, .descriptorCount = 1, .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .pImageInfo = &(image_info), .pBufferInfo = nullptr, .pTexelBufferView = nullptr}); - } - - vkUpdateDescriptorSets(LogicalDevice, write_descriptor_sets.size(), write_descriptor_sets.data(), 0, nullptr); - } - ZReleaseScratch(scratch); - } - } - - Textures::TextureHandle tex_to_dispose = {}; - if (TextureHandleToDispose.Pop(tex_to_dispose)) - { - auto texture = GlobalTextures.Access(tex_to_dispose); - if (texture) - { - texture->Dispose(); - GlobalTextures.Remove(tex_to_dispose); - } - } - - Primitives::Semaphore* acquired_semaphore = SwapchainAcquiredSemaphores[CurrentFrameIndex]; - Primitives::Semaphore* render_complete_semaphore = SwapchainRenderCompleteSemaphores[CurrentFrameIndex]; - Primitives::Fence* signal_fence = SwapchainSignalFences[CurrentFrameIndex]; - - m_buffer_manager.EndEnqueuedBuffers(); - auto scratch = ZGetScratch(Arena); - - Array buffer = {}; - buffer.init(scratch.Arena, m_buffer_manager.EnqueuedCommandbufferIndex, m_buffer_manager.EnqueuedCommandbufferIndex); - for (int i = 0; i < buffer.size(); ++i) - { - buffer[i] = m_buffer_manager.EnqueuedCommandbuffers[i]->GetHandle(); - } - - ZENGINE_VALIDATE_ASSERT(render_complete_semaphore->GetState() != Rendering::Primitives::SemaphoreState::Submitted, "Signal semaphore is already in a signaled state.") - ZENGINE_VALIDATE_ASSERT(signal_fence->GetState() != Rendering::Primitives::FenceState::Submitted, "Signal fence is already in a signaled state.") - - VkQueue queue = m_queue_map.at(Rendering::QueueType::GRAPHIC_QUEUE); - VkSemaphore wait_semaphores[] = {acquired_semaphore->GetHandle()}; - VkSemaphore signal_semaphores[] = {render_complete_semaphore->GetHandle()}; - VkPipelineStageFlags stage_flags[] = {VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT}; - VkSubmitInfo submit_info = {.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .pNext = nullptr, .waitSemaphoreCount = 1, .pWaitSemaphores = wait_semaphores, .pWaitDstStageMask = stage_flags, .commandBufferCount = (uint32_t) buffer.size(), .pCommandBuffers = buffer.data(), .signalSemaphoreCount = 1, .pSignalSemaphores = signal_semaphores}; - - auto submit = vkQueueSubmit(queue, 1, &(submit_info), signal_fence->GetHandle()); - ZENGINE_VALIDATE_ASSERT(submit == VK_SUCCESS, "Failed to submit queue") - - ZReleaseScratch(scratch); - - m_buffer_manager.ResetEnqueuedBufferIndex(); - - signal_fence->SetState(FenceState::Submitted); - render_complete_semaphore->SetState(SemaphoreState::Submitted); - - VkSwapchainKHR swapchains[] = {SwapchainHandle}; - uint32_t frames[] = {SwapchainImageIndex}; - VkPresentInfoKHR present_info = {.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, .pNext = nullptr, .waitSemaphoreCount = 1, .pWaitSemaphores = signal_semaphores, .swapchainCount = 1, .pSwapchains = swapchains, .pImageIndices = frames}; - VkResult present_result = vkQueuePresentKHR(queue, &present_info); - - acquired_semaphore->SetState(SemaphoreState::Idle); - render_complete_semaphore->SetState(SemaphoreState::Idle); - - if (present_result == VK_ERROR_OUT_OF_DATE_KHR || present_result == VK_SUBOPTIMAL_KHR) - { - ResizeSwapchain(); - IncrementFrameImageCount(); - return; - } - - ZENGINE_VALIDATE_ASSERT(present_result == VK_SUCCESS, "Failed to present current frame on Window") - - IncrementFrameImageCount(); - - { - std::lock_guard l(DirtyMutex); - IdleFrameCount++; - } - DirtyCollectorCond.notify_one(); - } - - void VulkanDevice::IncrementFrameImageCount() - { - PreviousFrameIndex = CurrentFrameIndex; - CurrentFrameIndex = (CurrentFrameIndex + 1) % SwapchainImageCount; - } - - CommandBuffer* VulkanDevice::GetCommandBuffer(bool begin) - { - return m_buffer_manager.GetCommandBuffer(CurrentFrameIndex, begin); - } - - CommandBuffer* VulkanDevice::GetInstantCommandBuffer(Rendering::QueueType type, bool begin) - { - return m_buffer_manager.GetInstantCommandBuffer(type, CurrentFrameIndex, begin); - } - - void VulkanDevice::EnqueueInstantCommandBuffer(CommandBuffer* const buffer, int wait_flag) - { - m_buffer_manager.EndInstantCommandBuffer(buffer, this, wait_flag); - } - - void VulkanDevice::EnqueueCommandBuffer(CommandBuffer* const buffer) - { - m_buffer_manager.EnqueueBuffer(buffer); - } - void VulkanDevice::DirtyCollector() { - using namespace std::chrono_literals; + RunningDirtyCollector.store(true, std::memory_order_release); ZENGINE_CORE_INFO("[*] Dirty Resource Collector started...") - while (RunningDirtyCollector) + while (RunningDirtyCollector.load(std::memory_order_acquire)) { - std::unique_lock lock(DirtyMutex); - DirtyCollectorCond.wait(lock, [this] { return (IdleFrameCount > IdleFrameThreshold) || RunningDirtyCollector.load() == false; }); + uint32_t idle_count = SwapchainPtr->IdleFrameCount.load(std::memory_order_acquire); + uint32_t threshold = SwapchainPtr->IdleFrameThreshold.load(std::memory_order_acquire); - if (RunningDirtyCollector == false) + if (idle_count < threshold) { - break; + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + continue; } if (DirtyResources.CanRemove()) @@ -1538,12 +1233,12 @@ namespace ZEngine::Hardwares } DirtyResource& res_handle = DirtyResources[handle]; - if (res_handle.FrameIndex == CurrentFrameIndex) + if (res_handle.FrameIndex == SwapchainPtr->CurrentFrame->Index) { switch (res_handle.Type) { case Rendering::DeviceResourceType::SAMPLER: - vkDestroySampler(LogicalDevice, reinterpret_cast(res_handle.Handle), nullptr); + // vkDestroySampler(LogicalDevice, reinterpret_cast(res_handle.Handle), nullptr); break; case Rendering::DeviceResourceType::FRAMEBUFFER: vkDestroyFramebuffer(LogicalDevice, reinterpret_cast(res_handle.Handle), nullptr); @@ -1609,7 +1304,7 @@ namespace ZEngine::Hardwares } BufferView& buffer = DirtyBuffers[handle]; - if (buffer && buffer.FrameIndex == CurrentFrameIndex) + if (buffer && buffer.FrameIndex == SwapchainPtr->CurrentFrame->Index) { vmaDestroyBuffer(VmaAllocatorValue, buffer.Handle, buffer.Allocation); buffer.Handle = VK_NULL_HANDLE; @@ -1633,10 +1328,10 @@ namespace ZEngine::Hardwares BufferImage& buffer = DirtyBufferImages[handle]; - if (buffer && buffer.FrameIndex == CurrentFrameIndex) + if (buffer && buffer.FrameIndex == SwapchainPtr->CurrentFrame->Index) { vkDestroyImageView(LogicalDevice, buffer.ViewHandle, nullptr); - vkDestroySampler(LogicalDevice, buffer.Sampler, nullptr); + // vkDestroySampler(LogicalDevice, buffer.Sampler, nullptr); vmaDestroyImage(VmaAllocatorValue, buffer.Handle, buffer.Allocation); buffer.Handle = VK_NULL_HANDLE; buffer.Allocation = VK_NULL_HANDLE; @@ -1644,8 +1339,7 @@ namespace ZEngine::Hardwares } } } - - IdleFrameCount = 0; + SwapchainPtr->IdleFrameCount.store(0, std::memory_order_release); } ZENGINE_CORE_INFO("[*] Dirty Resource Collector stopped...") @@ -1688,9 +1382,10 @@ namespace ZEngine::Hardwares /* * CommandBufferManager impl */ - CommandBuffer::CommandBuffer(Hardwares::VulkanDevice* device, VkCommandPool command_pool, Rendering::QueueType type, bool one_time_usage) : Device(device), QueueType(type), m_command_pool(command_pool) + CommandBuffer::CommandBuffer(Hardwares::VulkanDevice* device, VkCommandPool command_pool, Rendering::QueueType type, bool primary) : Device(device), QueueType(type), m_command_pool(command_pool) { Device->Arena->CreateSubArena(ZKilo(120), &LocalArena); + BufferType = primary ? CommandBufferType::Primary : CommandBufferType::Secondary; Create(); } @@ -1705,12 +1400,12 @@ namespace ZEngine::Hardwares VkCommandBufferAllocateInfo command_buffer_allocation_info = {}; command_buffer_allocation_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - command_buffer_allocation_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + command_buffer_allocation_info.level = (BufferType == CommandBufferType::Primary) ? VK_COMMAND_BUFFER_LEVEL_PRIMARY : VK_COMMAND_BUFFER_LEVEL_SECONDARY; command_buffer_allocation_info.commandBufferCount = 1; command_buffer_allocation_info.commandPool = m_command_pool; ZENGINE_VALIDATE_ASSERT(vkAllocateCommandBuffers(Device->LogicalDevice, &command_buffer_allocation_info, &m_command_buffer) == VK_SUCCESS, "Failed to allocate command buffer!") - m_command_buffer_state = CommanBufferState::Idle; + m_command_buffer_state = CommandBufferState::Idle; } void CommandBuffer::Free() @@ -1730,22 +1425,44 @@ namespace ZEngine::Hardwares void CommandBuffer::Begin() { - ZENGINE_VALIDATE_ASSERT(m_command_buffer_state == CommanBufferState::Idle, "command buffer must be in Idle state") + ZENGINE_VALIDATE_ASSERT(m_command_buffer_state == CommandBufferState::Idle, "command buffer must be in Idle state") + ZENGINE_VALIDATE_ASSERT(BufferType == CommandBufferType::Primary, "command buffer must be Primary Buffer Type") VkCommandBufferBeginInfo command_buffer_begin_info = {}; command_buffer_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; command_buffer_begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; ZENGINE_VALIDATE_ASSERT(vkBeginCommandBuffer(m_command_buffer, &command_buffer_begin_info) == VK_SUCCESS, "Failed to begin the Command Buffer") - m_command_buffer_state = CommanBufferState::Recording; + m_command_buffer_state = CommandBufferState::Recording; + } + + void CommandBuffer::BeginSecondary(Rendering::Renderers::RenderPasses::RenderPass* const render_pass, VkFramebuffer framebuffer) + { + ZENGINE_VALIDATE_ASSERT(m_command_buffer_state == CommandBufferState::Idle, "command buffer must be in Idle state") + ZENGINE_VALIDATE_ASSERT(BufferType == CommandBufferType::Secondary, "command buffer must be Secondary Buffer Type") + + VkCommandBufferInheritanceInfo inheritance_info = {.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO}; + inheritance_info.renderPass = render_pass->GetAttachment()->GetHandle(); + inheritance_info.subpass = 0; + inheritance_info.framebuffer = framebuffer; + + VkCommandBufferBeginInfo command_buffer_begin_info = {}; + command_buffer_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + command_buffer_begin_info.flags = VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT; + command_buffer_begin_info.pInheritanceInfo = &inheritance_info; + + ZENGINE_VALIDATE_ASSERT(vkBeginCommandBuffer(m_command_buffer, &command_buffer_begin_info) == VK_SUCCESS, "Failed to begin the Command Buffer") + + m_command_buffer_state = CommandBufferState::Recording; + m_active_render_pass = render_pass; } void CommandBuffer::End() { - ZENGINE_VALIDATE_ASSERT(m_command_buffer_state == CommanBufferState::Recording, "command buffer must be in Idle state") + ZENGINE_VALIDATE_ASSERT(m_command_buffer_state == CommandBufferState::Recording, "command buffer must be in Idle state") ZENGINE_VALIDATE_ASSERT(vkEndCommandBuffer(m_command_buffer) == VK_SUCCESS, "Failed to end recording command buffer!") - m_command_buffer_state = CommanBufferState::Executable; + m_command_buffer_state = CommandBufferState::Executable; } bool CommandBuffer::Completed() @@ -1755,27 +1472,27 @@ namespace ZEngine::Hardwares bool CommandBuffer::IsExecutable() { - return m_command_buffer_state == CommanBufferState::Executable; + return m_command_buffer_state == CommandBufferState::Executable; } bool CommandBuffer::IsRecording() { - return m_command_buffer_state == CommanBufferState::Recording; + return m_command_buffer_state == CommandBufferState::Recording; } - CommanBufferState CommandBuffer::GetState() const + CommandBufferState CommandBuffer::GetState() const { - return CommanBufferState{m_command_buffer_state.load()}; + return CommandBufferState{m_command_buffer_state.load()}; } void CommandBuffer::ResetState() { - m_command_buffer_state = CommanBufferState::Idle; + m_command_buffer_state = CommandBufferState::Idle; m_signal_fence = {}; m_signal_semaphore = {}; } - void CommandBuffer::SetState(const CommanBufferState& state) + void CommandBuffer::SetState(const CommandBufferState& state) { m_command_buffer_state = state; } @@ -1811,9 +1528,10 @@ namespace ZEngine::Hardwares m_clear_value[1].depthStencil.stencil = stencil; } - void CommandBuffer::BeginRenderPass(Rendering::Renderers::RenderPasses::RenderPass* const render_pass, VkFramebuffer framebuffer) + void CommandBuffer::BeginRenderPass(Rendering::Renderers::RenderPasses::RenderPass* const render_pass, VkFramebuffer framebuffer, bool is_content_secondary_command_buffer) { ZENGINE_VALIDATE_ASSERT(m_command_buffer != nullptr, "Command buffer can't be null") + ZENGINE_VALIDATE_ASSERT(BufferType == CommandBufferType::Primary, "command buffer must be Primary Buffer Type") const auto& render_pass_spec = render_pass->Specification; const uint32_t width = render_pass->GetRenderAreaWidth(); @@ -1863,24 +1581,7 @@ namespace ZEngine::Hardwares render_pass_begin_info.clearValueCount = clear_values.size(); render_pass_begin_info.pClearValues = clear_values.data(); - vkCmdBeginRenderPass(m_command_buffer, &render_pass_begin_info, VK_SUBPASS_CONTENTS_INLINE); - - VkViewport viewport = {}; - viewport.x = 0.0f; - viewport.y = 0.0f; - viewport.width = width; - viewport.height = height; - viewport.minDepth = 0.0f; - viewport.maxDepth = 1.0f; - vkCmdSetViewport(m_command_buffer, 0, 1, &viewport); - - /*Scissor definition*/ - VkRect2D scissor = {}; - scissor.offset = {0, 0}; - scissor.extent = {width, height}; - vkCmdSetScissor(m_command_buffer, 0, 1, &scissor); - - vkCmdBindPipeline(m_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, render_pass->Pipeline->Handle); + vkCmdBeginRenderPass(m_command_buffer, &render_pass_begin_info, is_content_secondary_command_buffer ? VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS : VK_SUBPASS_CONTENTS_INLINE); m_active_render_pass = render_pass; @@ -1937,6 +1638,16 @@ namespace ZEngine::Hardwares } } + void CommandBuffer::BindPipeline(Rendering::Specifications::PipelineBindPoint bind_point, Rendering::Renderers::Pipelines::GraphicPipeline* const pipeline) + { + ZENGINE_VALIDATE_ASSERT(m_command_buffer != nullptr, "Command buffer can't be null") + ZENGINE_VALIDATE_ASSERT(pipeline != nullptr, "Pipeline can't be null") + ZENGINE_VALIDATE_ASSERT(pipeline->Handle != VK_NULL_HANDLE, "Pipeline Handle can't be null") + + // todo : adapt value based on bind_point + vkCmdBindPipeline(m_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline->Handle); + } + void CommandBuffer::DrawIndirect(const Hardwares::IndirectBuffer& buffer) { ZENGINE_VALIDATE_ASSERT(m_command_buffer != nullptr, "Command buffer can't be null") @@ -2020,12 +1731,29 @@ namespace ZEngine::Hardwares } } - void CommandBuffer::SetScissor(const VkRect2D& scissor) + void CommandBuffer::SetScissor(uint32_t w, uint32_t h, int32_t x, int32_t y) { ZENGINE_VALIDATE_ASSERT(m_command_buffer != nullptr, "Command buffer can't be null") + VkRect2D scissor = {}; + scissor.offset = {x, y}; + scissor.extent = {w, h}; vkCmdSetScissor(m_command_buffer, 0, 1, &scissor); } + void CommandBuffer::SetViewport(uint32_t w, uint32_t h, float x, float y, float min_depth, float max_depth) + { + ZENGINE_VALIDATE_ASSERT(m_command_buffer != nullptr, "Command buffer can't be null") + + VkViewport viewport = {}; + viewport.x = x; + viewport.y = y; + viewport.width = w; + viewport.height = h; + viewport.minDepth = min_depth; + viewport.maxDepth = max_depth; + vkCmdSetViewport(m_command_buffer, 0, 1, &viewport); + } + void CommandBuffer::PushConstants(VkShaderStageFlags stage_flags, uint32_t offset, uint32_t size, const void* data) { ZENGINE_VALIDATE_ASSERT(m_command_buffer != nullptr, "Command buffer can't be null") @@ -2037,7 +1765,32 @@ namespace ZEngine::Hardwares } } - void CommandBufferManager::Initialize(VulkanDevice* device, int thread_count) + void CommandBuffer::ExecuteSecondaryCommandBuffers(Core::Containers::ArrayView buffers) + { + ZENGINE_VALIDATE_ASSERT(m_command_buffer != nullptr, "Command buffer can't be null") + ZENGINE_VALIDATE_ASSERT(BufferType == CommandBufferType::Primary, "command buffer must be Primary Buffer Type") + + if (buffers.size() == 0) + { + ZENGINE_CORE_WARN("No secondary buffers to execute") + return; + } + + Array handles = {}; + auto scratch = ZGetScratch(Device->Arena); + + handles.init(scratch.Arena, buffers.size(), buffers.size()); + for (size_t i = 0; i < buffers.size(); ++i) + { + handles[i] = buffers[i].GetHandle(); + } + + vkCmdExecuteCommands(m_command_buffer, handles.size(), handles.data()); + + ZReleaseScratch(scratch); + } + + void CommandBufferManager::Initialize(VulkanDevice* device, uint32_t image_count, uint8_t override_thread_count) { if (m_is_initialized) { @@ -2045,49 +1798,65 @@ namespace ZEngine::Hardwares return; } - Device = device; - m_thread_count = thread_count; - m_total_pool_count = Device->SwapchainImageCount * m_thread_count; - TotalCommandBufferCount = m_total_pool_count * MaxBufferPerPool; - m_instant_fence = ZPushStructCtorArgs(Device->Arena, Primitives::Fence, Device); - m_instant_semaphore = ZPushStructCtorArgs(Device->Arena, Primitives::Semaphore, Device); + Device = device; + TotalThreadCount = override_thread_count > 0 ? override_thread_count : device->WorkerThreadCount; + TotalPoolCount = image_count * TotalThreadCount; + TotalCommandBufferCount = TotalPoolCount * MaxBufferPerPool; + TotalInstantCommandBufferCount = MaxBufferPerPool * MaxBufferPerPool * TotalPoolCount; // We want to have enough instant command buffers for each pool, so we can guarantee that there will always be an instant command buffer available for each pool when needed - EnqueuedCommandbuffers.init(Device->Arena, TotalCommandBufferCount, TotalCommandBufferCount); + InstantGraphicsPools.init(Device->Arena, TotalPoolCount, TotalPoolCount); + InstantGraphicsCommandBuffers.init(Device->Arena, TotalInstantCommandBufferCount, TotalInstantCommandBufferCount); + CommandPools.init(Device->Arena, TotalPoolCount, TotalPoolCount); + CommandBuffers.init(Device->Arena, TotalCommandBufferCount, TotalCommandBufferCount); + EnqueuedCommandBuffers.init(Device->Arena, TotalCommandBufferCount, TotalCommandBufferCount); - CommandPools.init(Device->Arena, m_total_pool_count, m_total_pool_count); - for (int i = 0; i < m_total_pool_count; ++i) + for (uint32_t i = 0; i < TotalPoolCount; ++i) { - CommandPools[i] = ZPushStructCtorArgs(Device->Arena, Rendering::Pools::CommandPool, Device, Rendering::QueueType::GRAPHIC_QUEUE); + InstantGraphicsPools[i] = ZPushStructCtorArgs(Device->Arena, Rendering::Pools::CommandPool, Device, QueueType::GRAPHIC_QUEUE); + + for (uint32_t buf_idx = 0; buf_idx < (MaxBufferPerPool * MaxBufferPerPool); ++buf_idx) + { + uint32_t buffer_idx = (i * (MaxBufferPerPool * MaxBufferPerPool)) + buf_idx; + InstantGraphicsCommandBuffers[buffer_idx] = ZPushStructCtorArgs(Device->Arena, CommandBuffer, Device, InstantGraphicsPools[i]->Handle, InstantGraphicsPools[i]->QueueType, true); + } } - CommandBuffers.init(Device->Arena, TotalCommandBufferCount, TotalCommandBufferCount); - for (int i = 0; i < TotalCommandBufferCount; ++i) + for (uint32_t i = 0; i < TotalPoolCount; ++i) { - int pool_index = GetPoolFromIndex(Rendering::QueueType::GRAPHIC_QUEUE, i); - auto& pool = CommandPools[pool_index]; - CommandBuffers[i] = ZPushStructCtorArgs( - Device->Arena, - CommandBuffer, - Device, - pool->Handle, - pool->QueueType, - /*(i % MaxBufferPerPool) == 0 ? false : true */ false); + CommandPools[i] = ZPushStructCtorArgs(Device->Arena, Rendering::Pools::CommandPool, Device, QueueType::GRAPHIC_QUEUE); + for (uint32_t buf_idx = 0; buf_idx < MaxBufferPerPool; ++buf_idx) + { + uint32_t buffer_idx = (i * MaxBufferPerPool) + buf_idx; + bool is_primary = (buffer_idx % 2) == 0; + CommandBuffers[buffer_idx] = ZPushStructCtorArgs(Device->Arena, CommandBuffer, Device, CommandPools[i]->Handle, CommandPools[i]->QueueType, is_primary); + } } if (Device->HasSeperateTransfertQueueFamily) { - TransferCommandPools.init(Device->Arena, m_total_pool_count, m_total_pool_count); - for (int i = 0; i < m_total_pool_count; ++i) + InstantTransferPools.init(Device->Arena, TotalPoolCount, TotalPoolCount); + TransferCommandPools.init(Device->Arena, TotalPoolCount, TotalPoolCount); + TransferCommandBuffers.init(Device->Arena, TotalCommandBufferCount, TotalCommandBufferCount); + InstantTransferCommandBuffers.init(Device->Arena, TotalInstantCommandBufferCount, TotalInstantCommandBufferCount); + + for (uint32_t i = 0; i < TotalPoolCount; ++i) { - TransferCommandPools[i] = ZPushStructCtorArgs(Device->Arena, Rendering::Pools::CommandPool, Device, Rendering::QueueType::TRANSFER_QUEUE); + InstantTransferPools[i] = ZPushStructCtorArgs(Device->Arena, Rendering::Pools::CommandPool, Device, Rendering::QueueType::TRANSFER_QUEUE); + for (uint32_t buf_idx = 0; buf_idx < (MaxBufferPerPool * MaxBufferPerPool); ++buf_idx) + { + uint32_t buffer_idx = (i * (MaxBufferPerPool * MaxBufferPerPool)) + buf_idx; + InstantTransferCommandBuffers[buffer_idx] = ZPushStructCtorArgs(Device->Arena, CommandBuffer, Device, InstantTransferPools[i]->Handle, InstantTransferPools[i]->QueueType, true); + } } - TransferCommandBuffers.init(Device->Arena, TotalCommandBufferCount, TotalCommandBufferCount); - for (int i = 0; i < TotalCommandBufferCount; ++i) + for (uint32_t i = 0; i < TotalPoolCount; ++i) { - int pool_index = GetPoolFromIndex(Rendering::QueueType::TRANSFER_QUEUE, i); - auto& pool = TransferCommandPools[pool_index]; - TransferCommandBuffers[i] = ZPushStructCtorArgs(Device->Arena, CommandBuffer, Device, pool->Handle, pool->QueueType, true); + TransferCommandPools[i] = ZPushStructCtorArgs(Device->Arena, Rendering::Pools::CommandPool, Device, Rendering::QueueType::TRANSFER_QUEUE); + for (uint32_t buf_idx = 0; buf_idx < MaxBufferPerPool; ++buf_idx) + { + uint32_t buffer_idx = (i * MaxBufferPerPool) + buf_idx; + TransferCommandBuffers[buffer_idx] = ZPushStructCtorArgs(Device->Arena, CommandBuffer, Device, TransferCommandPools[i]->Handle, TransferCommandPools[i]->QueueType, true); + } } } @@ -2096,19 +1865,23 @@ namespace ZEngine::Hardwares void CommandBufferManager::Deinitialize() { - m_instant_semaphore = nullptr; - m_instant_fence = nullptr; + InstantGraphicsPools.clear(); + InstantGraphicsCommandBuffers.clear(); CommandBuffers.clear(); TransferCommandBuffers.clear(); + InstantTransferCommandBuffers.clear(); CommandPools.clear(); TransferCommandPools.clear(); - EnqueuedCommandbuffers.clear(); + InstantTransferPools.clear(); + + EnqueuedCommandBuffers.clear(); } - CommandBuffer* CommandBufferManager::GetCommandBuffer(uint8_t frame_index, bool begin) + CommandBuffer* CommandBufferManager::GetCommandBuffer(Rendering::QueueType type, uint8_t frame_index, uint8_t thread_index, uint8_t buffer_per_pool_index, bool begin) { - CommandBuffer* buffer = CommandBuffers[frame_index * MaxBufferPerPool]; + auto buffer_index = ((frame_index * TotalThreadCount) + thread_index) * MaxBufferPerPool + buffer_per_pool_index; + CommandBuffer* buffer = (type == Rendering::QueueType::TRANSFER_QUEUE && Device->HasSeperateTransfertQueueFamily) ? TransferCommandBuffers[buffer_index] : CommandBuffers[buffer_index]; if (begin) { @@ -2118,139 +1891,130 @@ namespace ZEngine::Hardwares return buffer; } - CommandBuffer* CommandBufferManager::GetInstantCommandBuffer(Rendering::QueueType type, uint8_t frame_index, bool begin) + CommandBuffer* CommandBufferManager::GetInstantCommandBuffer(Rendering::QueueType type, uint8_t frame_index, uint8_t thread_index, uint32_t buffer_per_pool_index, bool begin) { - CommandBuffer* buffer = (type == QueueType::TRANSFER_QUEUE && Device->HasSeperateTransfertQueueFamily) ? TransferCommandBuffers[frame_index] : CommandBuffers[(frame_index * MaxBufferPerPool) + 1]; - - std::unique_lock l(m_instant_command_mutex); - m_cond.wait(l, [this] { return m_instant_semaphore->GetState() == Primitives::SemaphoreState::Idle; }); - m_executing_instant_command = true; + // MaxBufferPerPool * MaxBufferPerPool is the total number of instant command buffers per pool + auto buffer_index = ((frame_index * TotalThreadCount) + thread_index) * (MaxBufferPerPool * MaxBufferPerPool) + buffer_per_pool_index; + CommandBuffer* buffer = (type == Rendering::QueueType::TRANSFER_QUEUE && Device->HasSeperateTransfertQueueFamily) ? InstantTransferCommandBuffers[buffer_index] : InstantGraphicsCommandBuffers[buffer_index]; if (begin) { buffer->ResetState(); + // Todo : We want to merge vkResetCommandBuffer with ResetState() when buffer is instant type + // vkResetCommandBuffer(buffer->GetHandle(), VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT); buffer->Begin(); } return buffer; } - void CommandBufferManager::EndInstantCommandBuffer(CommandBuffer* const buffer, VulkanDevice* const device, int wait_flag) - { - buffer->End(); - - auto flag = buffer->QueueType == QueueType::GRAPHIC_QUEUE ? VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT : VK_PIPELINE_STAGE_TRANSFER_BIT; - - if (wait_flag != -1) - { - flag = VkPipelineStageFlagBits(wait_flag); - } - - device->QueueSubmit(flag, buffer, m_instant_semaphore, m_instant_fence); - { - std::unique_lock l(m_instant_command_mutex); - m_executing_instant_command = false; - } - - m_cond.notify_one(); - } - - Rendering::Pools::CommandPool* CommandBufferManager::GetCommandPool(Rendering::QueueType type, uint8_t frame_index) + Rendering::Pools::CommandPool* CommandBufferManager::GetCommandPool(Rendering::QueueType type, uint8_t frame_index, uint8_t thread_index) { - return (type == QueueType::TRANSFER_QUEUE && Device->HasSeperateTransfertQueueFamily) ? TransferCommandPools[frame_index] : CommandPools[frame_index]; + uint32_t pool_index = (frame_index * TotalThreadCount) + thread_index; + return (type == QueueType::TRANSFER_QUEUE && Device->HasSeperateTransfertQueueFamily) ? TransferCommandPools[pool_index] : CommandPools[pool_index]; } - int CommandBufferManager::GetPoolFromIndex(Rendering::QueueType type, uint8_t index) + Rendering::Pools::CommandPool* CommandBufferManager::GetInstantCommandPool(Rendering::QueueType type, uint8_t frame_index, uint8_t thread_index) { - return index / MaxBufferPerPool; + uint32_t pool_index = (frame_index * TotalThreadCount) + thread_index; + return (type == QueueType::TRANSFER_QUEUE && Device->HasSeperateTransfertQueueFamily) ? InstantTransferPools[pool_index] : InstantGraphicsPools[pool_index]; } - void CommandBufferManager::ResetPool(int frame_index) + void CommandBufferManager::ResetPool(uint8_t frame_index, uint8_t thread_index) { - vkResetCommandPool(Device->LogicalDevice, CommandPools[frame_index]->Handle, 0); + uint32_t pool_index = (frame_index * TotalThreadCount) + thread_index; + vkResetCommandPool(Device->LogicalDevice, CommandPools[pool_index]->Handle, 0); if (Device->HasSeperateTransfertQueueFamily) { - vkResetCommandPool(Device->LogicalDevice, TransferCommandPools[frame_index]->Handle, 0); + vkResetCommandPool(Device->LogicalDevice, TransferCommandPools[pool_index]->Handle, 0); } } void CommandBufferManager::ResetEnqueuedBufferIndex() { - for (int i = 0; i < EnqueuedCommandbufferIndex; ++i) + for (int i = 0; i < EnqueuedCommandBufferIndex && i < EnqueuedCommandBuffers.size(); ++i) { - EnqueuedCommandbuffers[i]->SetState(CommanBufferState::Pending); + if (EnqueuedCommandBuffers[i]) + { + EnqueuedCommandBuffers[i]->SetState(CommandBufferState::Pending); + } } - EnqueuedCommandbufferIndex = 0u; + EnqueuedCommandBufferIndex = 0u; } void CommandBufferManager::EndEnqueuedBuffers() { - for (int i = 0; i < EnqueuedCommandbufferIndex; ++i) + for (int i = 0; i < EnqueuedCommandBufferIndex; ++i) { - EnqueuedCommandbuffers[i]->End(); + EnqueuedCommandBuffers[i]->End(); } } void CommandBufferManager::EnqueueBuffer(CommandBufferPtr const buffer) { - EnqueuedCommandbuffers[EnqueuedCommandbufferIndex++] = buffer; + if (EnqueuedCommandBufferIndex < EnqueuedCommandBuffers.size()) + { + EnqueuedCommandBuffers[EnqueuedCommandBufferIndex++] = buffer; + return; + } + ZENGINE_CORE_ERROR("[!] Enqueued Command Buffer overflow detected") } void CommandBufferManager::IncreaseBuffers() { - m_total_pool_count = Device->SwapchainImageCount * m_thread_count; - TotalCommandBufferCount = m_total_pool_count * MaxBufferPerPool; - - if (TotalCommandBufferCount > EnqueuedCommandbuffers.size()) - { - auto size = EnqueuedCommandbuffers.size(); - for (uint32_t i = size; i < TotalCommandBufferCount; ++i) - { - EnqueuedCommandbuffers.push(nullptr); - } - } - - if (m_total_pool_count > CommandPools.size()) - { - auto size = CommandPools.size(); - for (uint32_t i = size; i < TotalCommandBufferCount; ++i) - { - CommandPools.push(ZPushStructCtorArgs(Device->Arena, Rendering::Pools::CommandPool, Device, Rendering::QueueType::GRAPHIC_QUEUE)); - } - } - - if (TotalCommandBufferCount > CommandBuffers.size()) - { - auto size = CommandBuffers.size(); - for (uint32_t i = size; i < TotalCommandBufferCount; ++i) - { - int pool_index = GetPoolFromIndex(Rendering::QueueType::GRAPHIC_QUEUE, i); - auto& pool = CommandPools[pool_index]; - CommandBuffers.push(ZPushStructCtorArgs( - Device->Arena, - CommandBuffer, - Device, - pool->Handle, - pool->QueueType, - /*(i % MaxBufferPerPool) == 0 ? false : true */ false)); - } - } - - if (Device->HasSeperateTransfertQueueFamily) - { - auto size = TransferCommandPools.size(); - for (uint32_t i = size; i < TotalCommandBufferCount; ++i) - { - TransferCommandPools.push(ZPushStructCtorArgs(Device->Arena, Rendering::Pools::CommandPool, Device, Rendering::QueueType::TRANSFER_QUEUE)); - } - - size = TransferCommandBuffers.size(); - for (uint32_t i = size; i < TotalCommandBufferCount; ++i) - { - int pool_index = GetPoolFromIndex(Rendering::QueueType::TRANSFER_QUEUE, i); - auto& pool = TransferCommandPools[pool_index]; - TransferCommandBuffers.push(ZPushStructCtorArgs(Device->Arena, CommandBuffer, Device, pool->Handle, pool->QueueType, true)); - } - } + // TotalPoolCount = Device->SwapchainImageCount * TotalThreadCount; + // TotalCommandBufferCount = TotalPoolCount * MaxBufferPerPool; + + // if (TotalCommandBufferCount > EnqueuedCommandBuffers.size()) + // { + // auto size = EnqueuedCommandBuffers.size(); + // for (uint32_t i = size; i < TotalCommandBufferCount; ++i) + // { + // EnqueuedCommandBuffers.push(nullptr); + // } + // } + + // if (TotalPoolCount > CommandPools.size()) + // { + // auto size = CommandPools.size(); + // for (uint32_t i = size; i < TotalCommandBufferCount; ++i) + // { + // CommandPools.push(ZPushStructCtorArgs(Device->Arena, Rendering::Pools::CommandPool, Device, Rendering::QueueType::GRAPHIC_QUEUE)); + // } + // } + + // if (TotalCommandBufferCount > CommandBuffers.size()) + // { + // auto size = CommandBuffers.size(); + // for (uint32_t i = size; i < TotalCommandBufferCount; ++i) + // { + // int pool_index = GetPoolFromIndex(Rendering::QueueType::GRAPHIC_QUEUE, i); + // auto& pool = CommandPools[pool_index]; + // CommandBuffers.push(ZPushStructCtorArgs( + // Device->Arena, + // CommandBuffer, + // Device, + // pool->Handle, + // pool->QueueType, + // /*(i % MaxBufferPerPool) == 0 ? false : true */ false)); + // } + // } + + // if (Device->HasSeperateTransfertQueueFamily) + // { + // auto size = TransferCommandPools.size(); + // for (uint32_t i = size; i < TotalCommandBufferCount; ++i) + // { + // TransferCommandPools.push(ZPushStructCtorArgs(Device->Arena, Rendering::Pools::CommandPool, Device, Rendering::QueueType::TRANSFER_QUEUE)); + // } + + // size = TransferCommandBuffers.size(); + // for (uint32_t i = size; i < TotalCommandBufferCount; ++i) + // { + // int pool_index = GetPoolFromIndex(Rendering::QueueType::TRANSFER_QUEUE, i); + // auto& pool = TransferCommandPools[pool_index]; + // TransferCommandBuffers.push(ZPushStructCtorArgs(Device->Arena, CommandBuffer, Device, pool->Handle, pool->QueueType, true)); + // } + // } } bool CommandBufferManager::IsInitialized() const @@ -2283,7 +2047,7 @@ namespace ZEngine::Hardwares return m_device->CreateBuffer(static_cast(m_total_size), VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT); } - void IndirectBuffer::Upload(const VkDrawIndirectCommand* data, size_t byte_size) + void IndirectBuffer::Upload(uint8_t frame_index, uint8_t thread_index, const VkDrawIndirectCommand* data, size_t byte_size) { if (byte_size == 0) { @@ -2291,12 +2055,12 @@ namespace ZEngine::Hardwares } CommandCount = byte_size / sizeof(VkDrawIndirectCommand); - IGraphicBuffer::Upload(data, byte_size); + IGraphicBuffer::Upload(frame_index, thread_index, data, byte_size); } - void IndirectBuffer::Write(const void* data, size_t byte_size) + void IndirectBuffer::Write(uint8_t frame_index, uint8_t thread_index, const void* data, size_t byte_size) { - IGraphicBuffer::Write(data, byte_size); + IGraphicBuffer::Write(frame_index, thread_index, data, byte_size); CommandCount = byte_size / sizeof(VkDrawIndirectCommand); } @@ -2403,13 +2167,13 @@ namespace ZEngine::Hardwares } } - void IGraphicBuffer::Clear() + void IGraphicBuffer::Clear(uint8_t frame_index, uint8_t thread_index) { m_current_offset = 0; - ClearRange(0, m_current_offset, m_total_size); + ClearRange(frame_index, thread_index, 0, m_current_offset, m_total_size); } - void IGraphicBuffer::ClearRange(uint8_t value, uint32_t offset, size_t byte_size) + void IGraphicBuffer::ClearRange(uint8_t frame_index, uint8_t thread_index, uint8_t value, uint32_t offset, size_t byte_size) { if (byte_size == 0) { @@ -2423,39 +2187,13 @@ namespace ZEngine::Hardwares return; } - VkMemoryPropertyFlags mem_prop_flags; - vmaGetAllocationMemoryProperties(m_device->VmaAllocatorValue, Buffer.Allocation, &mem_prop_flags); - - if (mem_prop_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) - { - VmaAllocationInfo allocation_info = {}; - vmaGetAllocationInfo(m_device->VmaAllocatorValue, Buffer.Allocation, &allocation_info); - if (allocation_info.pMappedData) - { - auto mapped_buf = reinterpret_cast(allocation_info.pMappedData); - ZENGINE_VALIDATE_ASSERT(Helpers::secure_memset((mapped_buf + offset), value, allocation_info.size, byte_size) == Helpers::MEMORY_OP_SUCCESS, "Failed to perform memory copy operation") - } - } - else - { - BufferView staging_buffer = m_device->CreateBuffer(static_cast(byte_size), VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT); - - VmaAllocationInfo allocation_info = {}; - vmaGetAllocationInfo(m_device->VmaAllocatorValue, staging_buffer.Allocation, &allocation_info); - - if (allocation_info.pMappedData) - { - ZENGINE_VALIDATE_ASSERT(Helpers::secure_memset(allocation_info.pMappedData, value, allocation_info.size, byte_size) == Helpers::MEMORY_OP_SUCCESS, "Failed to perform memory copy operation") - ZENGINE_VALIDATE_ASSERT(vmaFlushAllocation(m_device->VmaAllocatorValue, staging_buffer.Allocation, 0, byte_size) == VK_SUCCESS, "Failed to flush allocation") - m_device->CopyBuffer(staging_buffer, Buffer, byte_size, 0u, offset); - } - - /* Cleanup resource */ - m_device->EnqueueBufferForDeletion(staging_buffer); - } + AsyncResourceLoader::UploadRequest request = { + .BufferUpload = {.Buffer = &Buffer, .Offset = offset, .ClearValue = value, .ByteSize = byte_size} + }; + m_device->AsyncResLoader->Submit(AsyncResourceLoader::UploadType::BUFFER_CLEAR, frame_index, thread_index, request); } - void IGraphicBuffer::UploadRange(const void* data, uint32_t offset, size_t byte_size) + void IGraphicBuffer::UploadRange(uint8_t frame_index, uint8_t thread_index, const void* data, uint32_t offset, size_t byte_size) { if (byte_size == 0) { @@ -2469,82 +2207,21 @@ namespace ZEngine::Hardwares return; } - VkMemoryPropertyFlags mem_prop_flags; - vmaGetAllocationMemoryProperties(m_device->VmaAllocatorValue, Buffer.Allocation, &mem_prop_flags); - - if (mem_prop_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) - { - ZENGINE_VALIDATE_ASSERT(vmaCopyMemoryToAllocation(m_device->VmaAllocatorValue, data, Buffer.Allocation, offset, byte_size) == VK_SUCCESS, "Failed to perform memory copy operation") - - VkAccessFlags dst_access_mask = VK_ACCESS_NONE; - VkPipelineStageFlags dst_pipeline_stage = VK_PIPELINE_STAGE_TRANSFER_BIT; - switch (Buffer.Type) - { - case BufferType::VERTEX: - dst_access_mask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; - dst_pipeline_stage = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; - break; - - case BufferType::INDEX: - dst_access_mask = VK_ACCESS_INDEX_READ_BIT; - dst_pipeline_stage = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; - break; - - case BufferType::UNIFORM: - dst_access_mask = VK_ACCESS_UNIFORM_READ_BIT; - dst_pipeline_stage = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT; - break; - - case BufferType::STORAGE: - dst_access_mask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; - dst_pipeline_stage = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - break; - - case BufferType::INDIRECT: - dst_access_mask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT; - dst_pipeline_stage = VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; - break; - case UNKNOWN: - break; - } - - auto command_buffer = m_device->GetInstantCommandBuffer(Rendering::QueueType::GRAPHIC_QUEUE); - VkBufferMemoryBarrier bufMemBarrier = {VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER}; - bufMemBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; - bufMemBarrier.dstAccessMask = dst_access_mask; - bufMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - bufMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - bufMemBarrier.buffer = Buffer.Handle; - bufMemBarrier.offset = 0; - bufMemBarrier.size = VK_WHOLE_SIZE; - - // It's important to insert a buffer memory barrier here to ensure writing to the buffer has finished. - vkCmdPipelineBarrier(command_buffer->GetHandle(), VK_PIPELINE_STAGE_HOST_BIT, dst_pipeline_stage, 0, 0, nullptr, 1, &bufMemBarrier, 0, nullptr); - - m_device->EnqueueInstantCommandBuffer(command_buffer, dst_pipeline_stage); - } - else - { - BufferView staging_buffer = m_device->CreateBuffer(static_cast(byte_size), VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT); - - ZENGINE_VALIDATE_ASSERT(vmaCopyMemoryToAllocation(m_device->VmaAllocatorValue, data, staging_buffer.Allocation, offset, byte_size) == VK_SUCCESS, "Failed to perform memory copy operation") - - m_device->CopyBuffer(staging_buffer, Buffer, byte_size, 0u, offset); - - /* Cleanup resource */ - m_device->EnqueueBufferForDeletion(staging_buffer); - } + AsyncResourceLoader::UploadRequest request = { + .BufferUpload = {.Buffer = &Buffer, .Data = data, .Offset = offset, .ByteSize = byte_size} + }; + m_device->AsyncResLoader->Submit(AsyncResourceLoader::UploadType::BUFFER, frame_index, thread_index, request); } - void IGraphicBuffer::Upload(const void* data, size_t byte_size) + void IGraphicBuffer::Upload(uint8_t frame_index, uint8_t thread_index, const void* data, size_t byte_size) { - UploadRange(data, m_current_offset, byte_size); + UploadRange(frame_index, thread_index, data, m_current_offset, byte_size); m_current_offset += byte_size; } - void IGraphicBuffer::Write(const void* data, size_t byte_size) + void IGraphicBuffer::Write(uint8_t frame_index, uint8_t thread_index, const void* data, size_t byte_size) { - UploadRange(data, 0, byte_size); + UploadRange(frame_index, thread_index, data, 0, byte_size); m_current_offset = byte_size; } @@ -2599,36 +2276,6 @@ namespace ZEngine::Hardwares return Rendering::Textures::TextureHandle{}; } - auto resource = GlobalTextures.Access(tex_handle); - - if (!resource) - { - return Rendering::Textures::TextureHandle{}; - } - - auto img_buf = Image2DBufferManager.Access(resource->BufferHandle); - auto image_buf_handle = img_buf->GetHandle(); - auto image_buf_aspect = (resource->Specification.Format == Specifications::ImageFormat::DEPTH_STENCIL_FROM_DEVICE) ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; - - auto new_image_layout = Specifications::ImageLayout::TRANSFER_DST_OPTIMAL; - - Specifications::ImageMemoryBarrierSpecification barrier_spec_0 = {}; - barrier_spec_0.ImageHandle = image_buf_handle; - barrier_spec_0.OldLayout = img_buf->Layout; - barrier_spec_0.NewLayout = new_image_layout; - barrier_spec_0.ImageAspectMask = VkImageAspectFlagBits(image_buf_aspect); - barrier_spec_0.SourceAccessMask = VK_ACCESS_NONE; - barrier_spec_0.DestinationAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - barrier_spec_0.SourceStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; - barrier_spec_0.DestinationStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; - barrier_spec_0.LayerCount = spec.LayerCount; - Primitives::ImageMemoryBarrier barrier_0{barrier_spec_0}; - - auto command_buf = GetInstantCommandBuffer(QueueType::GRAPHIC_QUEUE); - command_buf->TransitionImageLayout(barrier_0); - - img_buf->Layout = new_image_layout; - auto scratch = ZGetScratch(Arena); size_t data_size = width * height * byte_per_pixel; @@ -2648,28 +2295,14 @@ namespace ZEngine::Hardwares image_data[i + 3] = a_byte; } - WriteTextureData(command_buf, tex_handle, image_data.data()); + // todo : maybe we want to review how we handle threading here, for now we assume its creation on MainRenderThread + AsyncResourceLoader::UploadRequest request = { + .TextureUpload = {.Data = image_data.data(), .TexHandle = tex_handle} + }; + AsyncResLoader->Submit(AsyncResourceLoader::UploadType::TEXTURE_BUFFER, 0, 0, request); ZReleaseScratch(scratch); - new_image_layout = (image_buf_aspect & VK_IMAGE_ASPECT_DEPTH_BIT) ? Specifications::ImageLayout::DEPTH_STENCIL_ATTACHMENT_OPTIMAL : Specifications::ImageLayout::SHADER_READ_ONLY_OPTIMAL; - Specifications::ImageMemoryBarrierSpecification barrier_spec_1 = {}; - barrier_spec_1.ImageHandle = image_buf_handle; - barrier_spec_1.OldLayout = Specifications::ImageLayout::TRANSFER_DST_OPTIMAL; - barrier_spec_1.NewLayout = new_image_layout; - barrier_spec_1.ImageAspectMask = image_buf_aspect; - barrier_spec_1.SourceAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - barrier_spec_1.DestinationAccessMask = VK_ACCESS_SHADER_READ_BIT; - barrier_spec_1.SourceStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; - barrier_spec_1.DestinationStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - barrier_spec_1.LayerCount = spec.LayerCount; - Primitives::ImageMemoryBarrier barrier_1{barrier_spec_1}; - command_buf->TransitionImageLayout(barrier_1); - - EnqueueInstantCommandBuffer(command_buf); - - img_buf->Layout = new_image_layout; - return tex_handle; } @@ -2741,4 +2374,9 @@ namespace ZEngine::Hardwares return pass; } + void VulkanDevice::EnqueueAsyncGPUOperation(const AsyncGPUOperationHandle& operation) + { + AsyncGPUOperations.Enqueue(operation); + } + } // namespace ZEngine::Hardwares diff --git a/ZEngine/ZEngine/Hardwares/VulkanDevice.h b/ZEngine/ZEngine/Hardwares/VulkanDevice.h index 518b3618..1af3792c 100644 --- a/ZEngine/ZEngine/Hardwares/VulkanDevice.h +++ b/ZEngine/ZEngine/Hardwares/VulkanDevice.h @@ -2,6 +2,7 @@ #include #include // clang-format off +#include #include #include #include @@ -41,11 +42,18 @@ namespace ZEngine::Rendering::Shaders struct Shader; } +namespace ZEngine::Rendering::Renderers::Pipelines +{ + struct GraphicPipeline; +} + namespace ZEngine::Hardwares { struct WriteDescriptorSetRequestKey; struct WriteDescriptorSetRequest; struct CommandBufferManager; + struct AsyncGPUOperation; + struct AsyncGPUOperationHandle; /* * Vertex | Index | Uniform | Storage Buffers */ @@ -108,23 +116,23 @@ namespace ZEngine::Hardwares virtual BufferView CreateBuffer() = 0; virtual void Allocate(uint64_t size, const char* debug_name); - virtual void Clear(); - virtual void ClearRange(uint8_t value, uint32_t offset, size_t size); - virtual void UploadRange(const void* data, uint32_t offset, size_t size); - virtual void Upload(const void* data, size_t size); + virtual void Clear(uint8_t frame_index, uint8_t thread_index); + virtual void ClearRange(uint8_t frame_index, uint8_t thread_index, uint8_t value, uint32_t offset, size_t size); + virtual void UploadRange(uint8_t frame_index, uint8_t thread_index, const void* data, uint32_t offset, size_t size); + virtual void Upload(uint8_t frame_index, uint8_t thread_index, const void* data, size_t size); - virtual void Write(const void* data, size_t byte_size); + virtual void Write(uint8_t frame_index, uint8_t thread_index, const void* data, size_t byte_size); template - inline void Write(Core::Containers::ArrayView content) + inline void Write(uint8_t frame_index, uint8_t thread_index, Core::Containers::ArrayView content) { - Write(content.data(), content.size_bytes()); + Write(frame_index, thread_index, content.data(), content.size_bytes()); } template - inline void Upload(Core::Containers::ArrayView content) + inline void Upload(uint8_t frame_index, uint8_t thread_index, Core::Containers::ArrayView content) { - Upload(content.data(), content.size_bytes()); + Upload(frame_index, thread_index, content.data(), content.size_bytes()); } virtual void CleanUpMemory(); @@ -248,13 +256,13 @@ namespace ZEngine::Hardwares virtual BufferView CreateBuffer() override; virtual void CleanUpMemory() override; - virtual void Upload(const VkDrawIndirectCommand* data, size_t byte_size); + virtual void Upload(uint8_t frame_index, uint8_t thread_index, const VkDrawIndirectCommand* data, size_t byte_size); - virtual void Write(const void* data, size_t byte_size) override; + virtual void Write(uint8_t frame_index, uint8_t thread_index, const void* data, size_t byte_size) override; - inline void Write(Core::Containers::ArrayView content) + inline void Write(uint8_t frame_index, uint8_t thread_index, Core::Containers::ArrayView content) { - Write(content.data(), content.size_bytes()); + Write(frame_index, thread_index, content.data(), content.size_bytes()); } virtual ~IndirectBuffer() {} @@ -417,7 +425,7 @@ namespace ZEngine::Hardwares /* * Command Buffer definition */ - enum CommanBufferState : uint8_t + enum CommandBufferState : uint8_t { Idle = 0, Recording, @@ -425,6 +433,11 @@ namespace ZEngine::Hardwares Pending, Invalid }; + enum CommandBufferType : uint8_t + { + Primary = 0, + Secondary + }; struct CommandBuffer { @@ -432,6 +445,7 @@ namespace ZEngine::Hardwares ~CommandBuffer(); Rendering::QueueType QueueType; + CommandBufferType BufferType = CommandBufferType::Primary; Hardwares::VulkanDevice* Device = nullptr; Core::Memory::ArenaAllocator LocalArena = {}; @@ -439,23 +453,25 @@ namespace ZEngine::Hardwares void Free(); VkCommandBuffer GetHandle() const; void Begin(); + void BeginSecondary(Rendering::Renderers::RenderPasses::RenderPass* const render_pass, VkFramebuffer framebuffer); void End(); bool Completed(); bool IsExecutable(); bool IsRecording(); - CommanBufferState GetState() const; + CommandBufferState GetState() const; void ResetState(); - void SetState(const CommanBufferState& state); + void SetState(const CommandBufferState& state); void SetSignalFence(Rendering::Primitives::Fence* const semaphore); void SetSignalSemaphore(Rendering::Primitives::Semaphore* const semaphore); Rendering::Primitives::Semaphore* GetSignalSemaphore() const; Rendering::Primitives::Fence* GetSignalFence(); void ClearColor(float r, float g, float b, float a); void ClearDepth(float depth_color, uint32_t stencil); - void BeginRenderPass(Rendering::Renderers::RenderPasses::RenderPass* const, VkFramebuffer framebuffer); + void BeginRenderPass(Rendering::Renderers::RenderPasses::RenderPass* const, VkFramebuffer framebuffer, bool is_content_secondary_command_buffer); void EndRenderPass(); void BindDescriptorSets(uint32_t frame_index = 0); void BindDescriptorSet(const VkDescriptorSet& descriptor); + void BindPipeline(Rendering::Specifications::PipelineBindPoint bind_point, Rendering::Renderers::Pipelines::GraphicPipeline* const pipeline); void DrawIndirect(const Hardwares::IndirectBuffer& buffer); void DrawIndexedIndirect(const Hardwares::IndirectBuffer& buffer, uint32_t count); void DrawIndexed(uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance); @@ -464,11 +480,13 @@ namespace ZEngine::Hardwares void CopyBufferToImage(const Hardwares::BufferView& source, Hardwares::BufferImage& destination, uint32_t width, uint32_t height, uint32_t layer_count, VkImageLayout new_layout); void BindVertexBuffer(Hardwares::VertexBuffer& buffer); void BindIndexBuffer(const Hardwares::IndexBuffer& buffer, VkIndexType type); - void SetScissor(const VkRect2D& scissor); + void SetScissor(uint32_t w, uint32_t h, int32_t x = 0, int32_t y = 0); + void SetViewport(uint32_t w, uint32_t h, float x = 0.0f, float y = 0.0f, float min_depth = 0.0f, float max_depth = 1.0f); void PushConstants(VkShaderStageFlags stage_flags, uint32_t offset, uint32_t size, const void* data); + void ExecuteSecondaryCommandBuffers(Core::Containers::ArrayView buffers); private: - std::atomic_uint8_t m_command_buffer_state{CommanBufferState::Idle}; + std::atomic_uint8_t m_command_buffer_state{CommandBufferState::Idle}; VkCommandBuffer m_command_buffer{VK_NULL_HANDLE}; VkCommandPool m_command_pool{VK_NULL_HANDLE}; VkClearValue m_clear_value[2] = {0}; @@ -481,40 +499,44 @@ namespace ZEngine::Hardwares struct CommandBufferManager { - void Initialize(VulkanDevice* device, int thread_count = 1); + struct InstantCommandBufferInfo; + + void Initialize(VulkanDevice* device, uint32_t image_count = 0, uint8_t override_thread_count = 0); void Deinitialize(); - CommandBuffer* GetCommandBuffer(uint8_t frame_index, bool begin = true); - CommandBuffer* GetInstantCommandBuffer(Rendering::QueueType type, uint8_t frame_index, bool begin = true); - void EndInstantCommandBuffer(CommandBuffer* const buffer, VulkanDevice* const device, int wait_flag = -1); - Rendering::Pools::CommandPool* GetCommandPool(Rendering::QueueType type, uint8_t frame_index); - int GetPoolFromIndex(Rendering::QueueType type, uint8_t index); - void ResetPool(int frame_index); + CommandBuffer* GetCommandBuffer(Rendering::QueueType type, uint8_t frame_index, uint8_t thread_index, uint8_t buffer_per_pool_index, bool begin = true); + CommandBuffer* GetInstantCommandBuffer(Rendering::QueueType type, uint8_t frame_index, uint8_t thread_index, uint32_t buffer_per_pool_index, bool begin = true); + Rendering::Pools::CommandPool* GetCommandPool(Rendering::QueueType type, uint8_t frame_index, uint8_t thread_index); + Rendering::Pools::CommandPool* GetInstantCommandPool(Rendering::QueueType type, uint8_t frame_index, uint8_t thread_index); + void ResetPool(uint8_t frame_index, uint8_t thread_index); void IncreaseBuffers(); void EnqueueBuffer(CommandBufferPtr const buffer); void EndEnqueuedBuffers(); void ResetEnqueuedBufferIndex(); bool IsInitialized() const; - VulkanDevice* Device = nullptr; - const int MaxBufferPerPool = 4; - Core::Containers::Array CommandPools = {}; - Core::Containers::Array TransferCommandPools = {}; - Core::Containers::Array CommandBuffers = {}; - Core::Containers::Array TransferCommandBuffers = {}; - int TotalCommandBufferCount = 0; - uint32_t EnqueuedCommandbufferIndex = 0; - Core::Containers::Array EnqueuedCommandbuffers = {}; + uint32_t TotalCommandBufferCount = 0; + uint32_t TotalInstantCommandBufferCount = 0; + uint32_t TotalPoolCount = 0; + uint32_t TotalThreadCount = 0; + uint32_t EnqueuedCommandBufferIndex = 0; + const uint32_t MaxBufferPerPool = 4; + VulkanDevice* Device = nullptr; + + Core::Containers::Array InstantGraphicsPools = {}; + Core::Containers::Array InstantTransferPools = {}; + Core::Containers::Array InstantGraphicsCommandBuffers = {}; + Core::Containers::Array InstantTransferCommandBuffers = {}; + + Core::Containers::Array CommandPools = {}; + Core::Containers::Array TransferCommandPools = {}; + Core::Containers::Array CommandBuffers = {}; + Core::Containers::Array TransferCommandBuffers = {}; + Core::Containers::Array EnqueuedCommandBuffers = {}; private: - bool m_is_initialized = false; - int m_total_pool_count = 0; - int m_thread_count = 1; - std::condition_variable m_cond; - std::atomic_bool m_executing_instant_command{false}; - std::mutex m_instant_command_mutex; - ZRawPtr(Rendering::Primitives::Semaphore) m_instant_semaphore; - ZRawPtr(Rendering::Primitives::Fence) m_instant_fence; + bool m_is_initialized = false; }; + ZDEFINE_PTR(CommandBufferManager); struct WriteDescriptorSetRequestKey { @@ -541,6 +563,24 @@ namespace ZEngine::Hardwares VkDescriptorType DescriptorType; }; + /* + * Async GPU operation handle and definition + */ + struct AsyncGPUOperationHandle + { + uint32_t StageFlags = 0; + uint64_t SignalValue = 0; + Rendering::Primitives::Semaphore* Timeline = nullptr; + }; + + struct AsyncGPUOperation + { + uint64_t NextValue = 0; + Rendering::Primitives::Semaphore* Timeline = nullptr; + Core::Containers::Array RetireValues = {}; + + void Initialize(VulkanDevice* device, uint32_t total_buffer_count); + }; /* * Device definition */ @@ -549,16 +589,10 @@ namespace ZEngine::Hardwares bool HasSeperateTransfertQueueFamily = false; bool PhysicalDeviceSupportSampledImageBindless = false; bool PhysicalDeviceSupportStorageBufferBindless = false; + bool PhysicalDeviceSupportTimelineSemaphore = false; const char* ApplicationName = "Tetragrama"; const char* EngineName = "ZEngine"; - uint32_t SwapchainImageCount = 3; - uint32_t PreviousSwapchainImageCount = 3; - uint32_t SwapchainImageCountChangeCount = 0; - uint32_t SwapchainImageIndex = std::numeric_limits::max(); - uint32_t CurrentFrameIndex = std::numeric_limits::max(); - uint32_t PreviousFrameIndex = std::numeric_limits::max(); - uint32_t SwapchainImageWidth = std::numeric_limits::max(); - uint32_t SwapchainImageHeight = std::numeric_limits::max(); + uint32_t WorkerThreadCount = 1; uint32_t GraphicFamilyIndex = std::numeric_limits::max(); uint32_t TransferFamilyIndex = std::numeric_limits::max(); @@ -568,22 +602,19 @@ namespace ZEngine::Hardwares VkSurfaceKHR Surface = VK_NULL_HANDLE; VkSurfaceFormatKHR SurfaceFormat = {}; VkPresentModeKHR PresentMode = {}; - VkPhysicalDeviceProperties PhysicalDeviceProperties = {}; - VkPhysicalDeviceDescriptorIndexingProperties PhysicalDeviceDescriptorIndexingProperties = {}; + VkPhysicalDeviceProperties2 PhysicalDeviceProperties = {}; + VkPhysicalDeviceVulkan12Properties PhysicalDeviceVulkan12Properties = {}; VkDevice LogicalDevice = VK_NULL_HANDLE; VkPhysicalDevice PhysicalDevice = VK_NULL_HANDLE; VkPhysicalDeviceFeatures2 PhysicalDeviceFeature = {}; VkPhysicalDeviceMemoryProperties PhysicalDeviceMemoryProperties = {}; - VkSwapchainKHR SwapchainHandle = VK_NULL_HANDLE; + VkSampler GlobalLinearWrapSampler = VK_NULL_HANDLE; VkDescriptorPool GlobalDescriptorPoolHandle = VK_NULL_HANDLE; VmaAllocator VmaAllocatorValue = nullptr; + VkDescriptorImageInfo GlobalLinearWrapSamplerImageInfo = {}; + CommandBufferManagerPtr CommandBufferMgr = {}; + DeviceSwapchainPtr SwapchainPtr = {}; Core::Containers::Array DefaultDepthFormats = {}; - Rendering::Renderers::RenderPasses::Attachment* SwapchainAttachment = {}; - Core::Containers::Array SwapchainImageViews = {}; - Core::Containers::Array SwapchainFramebuffers = {}; - Core::Containers::Array SwapchainAcquiredSemaphores = {}; - Core::Containers::Array SwapchainRenderCompleteSemaphores = {}; - Core::Containers::Array SwapchainSignalFences = {}; Core::Containers::HashMap> ShaderCaches = {}; Core::Containers::HashMap> ShaderReservedDescriptorSetMap = {}; //> @@ -595,6 +626,7 @@ namespace ZEngine::Hardwares Helpers::HandleManager Image2DBufferManager = {}; Helpers::ThreadSafeQueue TextureHandleToUpdates = {}; Helpers::ThreadSafeQueue TextureHandleToDispose = {}; + Helpers::ThreadSafeQueue AsyncGPUOperations = {}; Helpers::HandleManager ShaderManager = {}; Helpers::HandleManager VertexBufferSetManager = {}; Helpers::HandleManager StorageBufferSetManager = {}; @@ -604,21 +636,18 @@ namespace ZEngine::Hardwares Helpers::HandleManager DirtyResources = {}; Helpers::HandleManager DirtyBuffers = {}; Helpers::HandleManager DirtyBufferImages = {}; - std::atomic_bool RunningDirtyCollector = true; - std::atomic_uint IdleFrameCount = 0; - std::atomic_uint IdleFrameThreshold = SwapchainImageCount * 3 * 3; - std::condition_variable DirtyCollectorCond = {}; - std::mutex DirtyMutex = {}; + std::atomic_bool RunningDirtyCollector = {}; std::mutex Mutex = {}; Windows::CoreWindow* CurrentWindow = nullptr; ZEngine::Core::Memory::ArenaAllocator* Arena = nullptr; AsyncResourceLoaderPtr AsyncResLoader = nullptr; - void Initialize(ZEngine::Core::Memory::ArenaAllocator* arena, Windows::CoreWindow* const window); + void Initialize(ZEngine::Core::Memory::ArenaAllocator* arena, Windows::CoreWindow* const window, uint32_t worker_thread_count); void Deinitialize(); - void Update(); void Dispose(); + void QueueSubmit(CommandBuffer* const command_buffer, Rendering::Primitives::Semaphore* const signal_semaphore, uint32_t wait_flag, uint64_t signal_value, uint64_t wait_value, Rendering::Primitives::Semaphore* const wait_timeline); bool QueueSubmit(const VkPipelineStageFlags wait_stage_flag, CommandBuffer* const command_buffer, Rendering::Primitives::Semaphore* const signal_semaphore = nullptr, Rendering::Primitives::Fence* const fence = nullptr); + void EnqueueAsyncGPUOperation(const AsyncGPUOperationHandle& handle); void EnqueueForDeletion(Rendering::DeviceResourceType resource_type, void* const resource_handle); void EnqueueForDeletion(Rendering::DeviceResourceType resource_type, DirtyResource resource); void EnqueueBufferForDeletion(BufferView& buffer); @@ -628,9 +657,8 @@ namespace ZEngine::Hardwares void QueueWaitAll(); void MapAndCopyToMemory(BufferView& buffer, size_t data_size, const void* data); BufferView CreateBuffer(VkDeviceSize byte_size, VkBufferUsageFlags buffer_usage, VmaAllocationCreateFlags vma_create_flags = 0); - void CopyBuffer(const BufferView& source, const BufferView& destination, VkDeviceSize byte_size, VkDeviceSize src_buffer_offset = 0u, VkDeviceSize dst_buffer_offset = 0u); + VkPipelineStageFlags CopyBuffer(CommandBuffer* const command_buffer, const BufferView& source, const BufferView& destination, VkDeviceSize byte_size, VkDeviceSize src_buffer_offset = 0u, VkDeviceSize dst_buffer_offset = 0u); BufferImage CreateImage(uint32_t width, uint32_t height, VkImageType image_type, VkImageViewType image_view_type, VkFormat image_format, VkImageTiling image_tiling, VkImageLayout image_initial_layout, VkImageUsageFlags image_usage, VkSharingMode image_sharing_mode, VkSampleCountFlagBits image_sample_count, VkMemoryPropertyFlags requested_properties, VkImageAspectFlagBits image_aspect_flag, uint32_t layer_count = 1U, VkImageCreateFlags image_create_flag_bit = 0); - VkSampler CreateImageSampler(); VkFormat FindSupportedFormat(Core::Containers::ArrayView format_collection, VkImageTiling image_tiling, VkFormatFeatureFlags feature_flags); VkFormat FindDepthFormat(); VkImageView CreateImageView(VkImage image, VkFormat image_format, VkImageViewType image_view_type, VkImageAspectFlagBits image_aspect_flag, uint32_t layer_count = 1U); @@ -640,16 +668,6 @@ namespace ZEngine::Hardwares IndirectBufferSetHandle CreateIndirectBufferSet(); IndexBufferSetHandle CreateIndexBufferSet(); UniformBufferSetHandle CreateUniformBufferSet(); - void CreateSwapchain(); - void ResizeSwapchain(); - void DisposeSwapchain(); - void NewFrame(); - void Present(); - void IncrementFrameImageCount(); - CommandBuffer* GetCommandBuffer(bool begin = true); - CommandBuffer* GetInstantCommandBuffer(Rendering::QueueType type, bool begin = true); - void EnqueueInstantCommandBuffer(CommandBuffer* const buffer, int wait_flag = -1); - void EnqueueCommandBuffer(CommandBuffer* const buffer); void DirtyCollector(); Helpers::Handle CompileShader(Rendering::Specifications::ShaderSpecification& spec); @@ -662,9 +680,8 @@ namespace ZEngine::Hardwares Rendering::Renderers::RenderPasses::RenderPass* CreateRenderPass(const Rendering::Specifications::RenderPassSpecification& spec); private: - VulkanLayer m_layer = {}; - CommandBufferManager m_buffer_manager = {}; - Core::Containers::HashMap m_queue_map = {}; + VulkanLayer m_layer = {}; + Core::Containers::HashMap m_queue_map = {}; VkDebugUtilsMessengerEXT m_debug_messenger{VK_NULL_HANDLE}; PFN_vkCreateDebugUtilsMessengerEXT __createDebugMessengerPtr{VK_NULL_HANDLE}; PFN_vkDestroyDebugUtilsMessengerEXT __destroyDebugMessengerPtr{VK_NULL_HANDLE}; diff --git a/ZEngine/ZEngine/Helpers/ThreadPool.cpp b/ZEngine/ZEngine/Helpers/ThreadPool.cpp index b7e79922..06ffcaf3 100644 --- a/ZEngine/ZEngine/Helpers/ThreadPool.cpp +++ b/ZEngine/ZEngine/Helpers/ThreadPool.cpp @@ -2,5 +2,5 @@ namespace ZEngine::Helpers { - Scope ThreadPoolHelper::m_threadPool = CreateScope(); + Scope ThreadPoolHelper::Pool = CreateScope(); } diff --git a/ZEngine/ZEngine/Helpers/ThreadPool.h b/ZEngine/ZEngine/Helpers/ThreadPool.h index b246d60b..af1eaa71 100644 --- a/ZEngine/ZEngine/Helpers/ThreadPool.h +++ b/ZEngine/ZEngine/Helpers/ThreadPool.h @@ -7,10 +7,16 @@ namespace ZEngine::Helpers { - class ThreadPool + struct ThreadPool { - public: - ThreadPool(size_t maxThreadCount = std::thread::hardware_concurrency()) : m_maxThreadCount(maxThreadCount), m_taskQueue(CreateRef>>()) {} + size_t MaxThreadCount = 0; + size_t CurrentThreadCount = 0; + size_t ReservedThreadCount = 1; + + ThreadPool(size_t maxThreadCount = std::thread::hardware_concurrency()) : MaxThreadCount(maxThreadCount), m_taskQueue(CreateRef>>()) + { + MaxThreadCount -= ReservedThreadCount; + } ~ThreadPool() { @@ -33,8 +39,6 @@ namespace ZEngine::Helpers } private: - size_t m_maxThreadCount; - size_t m_currentThreadCount{0}; std::atomic_bool m_cancellationToken{false}; std::mutex m_mutex; Ref>> m_taskQueue; @@ -45,7 +49,8 @@ namespace ZEngine::Helpers { queue->Wait(cancellationToken); - if (cancellationToken == true) + auto op_canceled = cancellationToken.load(std::memory_order_relaxed); + if (op_canceled == true) { break; } @@ -55,6 +60,7 @@ namespace ZEngine::Helpers { continue; } + task(); } } @@ -63,10 +69,10 @@ namespace ZEngine::Helpers { { std::unique_lock lock(m_mutex); - if (m_currentThreadCount < m_maxThreadCount) + if (CurrentThreadCount < MaxThreadCount) { std::thread(ThreadPool::WorkerThread, m_taskQueue.Weak(), std::cref(m_cancellationToken)).detach(); - m_currentThreadCount++; + CurrentThreadCount++; } } } @@ -74,20 +80,24 @@ namespace ZEngine::Helpers struct ThreadPoolHelper { - template - static void Submit(T&& f) + static Scope Pool; + + static void Initialize() { - if (!m_threadPool) + if (!Pool) { - m_threadPool = CreateScope(); + Pool = CreateScope(); } - m_threadPool->Enqueue(std::move(f)); + } + + template + static void Submit(T&& f) + { + Pool->Enqueue(std::move(f)); } private: ThreadPoolHelper() = delete; ~ThreadPoolHelper() = delete; - - static Scope m_threadPool; }; } // namespace ZEngine::Helpers \ No newline at end of file diff --git a/ZEngine/ZEngine/Managers/AssetManager.cpp b/ZEngine/ZEngine/Managers/AssetManager.cpp index b0009985..a6c960fd 100644 --- a/ZEngine/ZEngine/Managers/AssetManager.cpp +++ b/ZEngine/ZEngine/Managers/AssetManager.cpp @@ -193,7 +193,7 @@ namespace ZEngine::Managers new_tex.TextureUUID = gen(); const auto tex_absolute_path = absolute ? std::string(file) : fmt::format("{0}{1}{2}", s_Instance->CurrentWorkingSpacePath, PLATFORM_OS_BACKSLASH, file); - new_tex.Handle = s_Instance->Device->AsyncResLoader->LoadTextureFile(tex_absolute_path.c_str()); + new_tex.Handle = s_Instance->Device->AsyncResLoader->Submit(0, 0, {.TextureUpload = {.Filename = tex_absolute_path.c_str()}}); new_tex.Path.init(&(s_Instance->Arena), file); RegisterAsset(AssetType::TEXTURE, new_tex.TextureUUID, asset_id); @@ -313,7 +313,7 @@ namespace ZEngine::Managers new_tex.TextureUUID = tex.TextureUUID; const auto tex_absolute_path = fmt::format("{0}{1}{2}", s_Instance->CurrentWorkingSpacePath, PLATFORM_OS_BACKSLASH, tex.Path.c_str()); - new_tex.Handle = s_Instance->Device->AsyncResLoader->LoadTextureFile(tex_absolute_path.c_str()); + new_tex.Handle = s_Instance->Device->AsyncResLoader->Submit(0, 0, {.TextureUpload = {.Filename = tex_absolute_path.c_str()}}); new_tex.Path.init(&(s_Instance->Arena), tex.Path.c_str()); RegisterAsset(AssetType::TEXTURE, new_tex.TextureUUID, asset_id); diff --git a/ZEngine/ZEngine/Rendering/Primitives/Fence.h b/ZEngine/ZEngine/Rendering/Primitives/Fence.h index 42bd7be7..0fb1a7cc 100644 --- a/ZEngine/ZEngine/Rendering/Primitives/Fence.h +++ b/ZEngine/ZEngine/Rendering/Primitives/Fence.h @@ -36,4 +36,4 @@ namespace ZEngine::Rendering::Primitives FenceState m_fence_state{FenceState::Idle}; VkFence m_handle{VK_NULL_HANDLE}; }; -} // namespace ZEngine::Rendering::Primitives \ No newline at end of file +} // namespace ZEngine::Rendering::Primitives diff --git a/ZEngine/ZEngine/Rendering/Primitives/Semaphore.cpp b/ZEngine/ZEngine/Rendering/Primitives/Semaphore.cpp index d2d1c843..0272a0c6 100644 --- a/ZEngine/ZEngine/Rendering/Primitives/Semaphore.cpp +++ b/ZEngine/ZEngine/Rendering/Primitives/Semaphore.cpp @@ -3,11 +3,20 @@ namespace ZEngine::Rendering::Primitives { - Semaphore::Semaphore(Hardwares::VulkanDevice* const device) + Semaphore::Semaphore(Hardwares::VulkanDevice* const device, bool is_timeline) { - Device = device; - VkSemaphoreCreateInfo semaphore_create_info = {}; - semaphore_create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + IsTimeline = is_timeline; + Device = device; + VkSemaphoreTypeCreateInfo timeline_create_info = {.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO}; + + VkSemaphoreCreateInfo semaphore_create_info = {.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO}; + if (is_timeline) + { + timeline_create_info.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE; + timeline_create_info.initialValue = 0; + semaphore_create_info.pNext = &timeline_create_info; + } + ZENGINE_VALIDATE_ASSERT(vkCreateSemaphore(Device->LogicalDevice, &semaphore_create_info, nullptr, &m_handle) == VK_SUCCESS, "Failed to create Semaphore") } @@ -26,12 +35,33 @@ namespace ZEngine::Rendering::Primitives void Semaphore::Wait(const uint64_t value, const uint64_t timeout) { - /*No-Op for now, because it's for timeline semaphore*/ + if (!IsTimeline) + { + return; + } + + VkSemaphoreWaitInfo wait_info = {}; + wait_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO; + wait_info.semaphoreCount = 1; + wait_info.pSemaphores = &m_handle; + wait_info.pValues = &value; + + ZENGINE_VALIDATE_ASSERT(vkWaitSemaphores(Device->LogicalDevice, &wait_info, timeout) == VK_SUCCESS, "Failed to wait on Semaphore"); } void Semaphore::Signal(const uint64_t value) { - /*No-Op for now, because it's for timeline semaphore*/ + if (!IsTimeline) + { + return; + } + + VkSemaphoreSignalInfo signal_info = {}; + signal_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO; + signal_info.semaphore = m_handle; + signal_info.value = value; + + ZENGINE_VALIDATE_ASSERT(vkSignalSemaphore(Device->LogicalDevice, &signal_info) == VK_SUCCESS, "Failed to signal Semaphore"); } VkSemaphore Semaphore::GetHandle() const diff --git a/ZEngine/ZEngine/Rendering/Primitives/Semaphore.h b/ZEngine/ZEngine/Rendering/Primitives/Semaphore.h index fe03e6d3..9e1acbe6 100644 --- a/ZEngine/ZEngine/Rendering/Primitives/Semaphore.h +++ b/ZEngine/ZEngine/Rendering/Primitives/Semaphore.h @@ -18,10 +18,11 @@ namespace ZEngine::Rendering::Primitives struct Semaphore { - Semaphore(Hardwares::VulkanDevice* const device); + Semaphore(Hardwares::VulkanDevice* const device, bool is_timeline = false); ~Semaphore(); - Hardwares::VulkanDevice* Device = nullptr; + bool IsTimeline = false; + Hardwares::VulkanDevice* Device = nullptr; void Wait(const uint64_t value, const uint64_t timeout = UINT64_MAX); void Signal(const uint64_t value); VkSemaphore GetHandle() const; diff --git a/ZEngine/ZEngine/Rendering/Renderers/GraphicRenderer.cpp b/ZEngine/ZEngine/Rendering/Renderers/GraphicRenderer.cpp index 7523cf25..395a252b 100644 --- a/ZEngine/ZEngine/Rendering/Renderers/GraphicRenderer.cpp +++ b/ZEngine/ZEngine/Rendering/Renderers/GraphicRenderer.cpp @@ -42,7 +42,7 @@ namespace ZEngine::Rendering::Renderers auto material_buffer_set = Device->StorageBufferSetManager.Access(RenderSceneData->MaterialBufferHandle); auto indirect_buffer_set = Device->IndirectBufferSetManager.Access(RenderSceneData->IndirectBufferHandle); - for (int i = 0; i < Device->SwapchainImageCount; ++i) + for (int i = 0; i < Device->SwapchainPtr->BufferredFrameCount; ++i) { scene_camera->At(i)->Allocate(sizeof(UBOCameraLayout), RendererResourceName::SceneCameraBufferName); @@ -57,6 +57,7 @@ namespace ZEngine::Rendering::Renderers /* * Renderer Passes */ + auto upload_pass = ZPushStructCtor(Device->Arena, UploadPass); auto initial_pass = ZPushStructCtor(Device->Arena, InitialPass); auto scene_depth_prepass = ZPushStructCtor(Device->Arena, DepthPrePass); auto skybox_pass = ZPushStructCtor(Device->Arena, SkyboxPass); @@ -80,9 +81,10 @@ namespace ZEngine::Rendering::Renderers RenderGraph->ResourceBuilder->CreateBufferSet("g_scene_point_light_buffer"); RenderGraph->ResourceBuilder->CreateBufferSet("g_scene_spot_light_buffer"); - RenderGraph->AddCallbackPass("Initial Pass", initial_pass); + RenderGraph->AddCallbackPass("Upload Pass", upload_pass); + // RenderGraph->AddCallbackPass("Initial Pass", initial_pass); RenderGraph->AddCallbackPass("Depth Pre-Pass", scene_depth_prepass); - // RenderGraph->AddCallbackPass("Skybox Pass", skybox_pass); + RenderGraph->AddCallbackPass("Skybox Pass", skybox_pass); RenderGraph->AddCallbackPass("Grid Pass", grid_pass); // RenderGraph->AddCallbackPass("G-Buffer Pass", gbuffer_pass); // RenderGraph->AddCallbackPass("Lighting Pass", lighting_pass); @@ -98,7 +100,7 @@ namespace ZEngine::Rendering::Renderers Device->GlobalTextures.Remove(FrameDepthRenderTarget); } - void GraphicRenderer::DrawScene(Hardwares::CommandBufferPtr const cb, Cameras::CameraPtr const camera) + void GraphicRenderer::DrawScene(uint8_t frame_index, uint8_t thread_index, Hardwares::CommandBufferPtr const cb, Cameras::CameraPtr const camera) { auto asset_manager = Managers::AssetManager::Instance(); auto ubo_camera_data = UBOCameraLayout{.View = camera->GetViewMatrix(), .Projection = camera->GetPerspectiveMatrix(), .Position = Vec4f(camera->GetPosition(), 1.0f)}; @@ -106,12 +108,13 @@ namespace ZEngine::Rendering::Renderers auto material_buffer_set = Device->StorageBufferSetManager.Access(RenderSceneData->MaterialBufferHandle); auto camera_buffer_set = Device->UniformBufferSetManager.Access(RenderSceneData->SceneCameraBufferHandle); - auto camera_buf = camera_buffer_set->At(Device->CurrentFrameIndex); - auto material_buffer = material_buffer_set->At(Device->CurrentFrameIndex); + auto camera_buf = camera_buffer_set->At(Device->SwapchainPtr->CurrentFrame->Index); + auto material_buffer = material_buffer_set->At(Device->SwapchainPtr->CurrentFrame->Index); - material_buffer->Write(ArrayView{asset_manager->GPUMeshMaterials}); - camera_buf->Write(reinterpret_cast(&ubo_camera_data), sizeof(UBOCameraLayout)); + material_buffer->Write(frame_index, thread_index, ArrayView{asset_manager->GPUMeshMaterials}); + camera_buf->Write(frame_index, thread_index, reinterpret_cast(&ubo_camera_data), sizeof(UBOCameraLayout)); + // todo : expand F, T to the render graph RenderGraph->Execute(cb); } diff --git a/ZEngine/ZEngine/Rendering/Renderers/GraphicRenderer.h b/ZEngine/ZEngine/Rendering/Renderers/GraphicRenderer.h index 925e5a20..9361b9a1 100644 --- a/ZEngine/ZEngine/Rendering/Renderers/GraphicRenderer.h +++ b/ZEngine/ZEngine/Rendering/Renderers/GraphicRenderer.h @@ -21,7 +21,7 @@ namespace ZEngine::Rendering::Renderers void Initialize(Hardwares::VulkanDevicePtr device) override; void Deinitialize() override; - void DrawScene(Hardwares::CommandBufferPtr const cb, Cameras::CameraPtr const camera); + void DrawScene(uint8_t frame_index, uint8_t thread_index, Hardwares::CommandBufferPtr const cb, Cameras::CameraPtr const camera); Textures::TextureHandle GetFrameOutput(); }; ZDEFINE_PTR(GraphicRenderer); diff --git a/ZEngine/ZEngine/Rendering/Renderers/IRenderer.h b/ZEngine/ZEngine/Rendering/Renderers/IRenderer.h index 5914c062..9cdd365b 100644 --- a/ZEngine/ZEngine/Rendering/Renderers/IRenderer.h +++ b/ZEngine/ZEngine/Rendering/Renderers/IRenderer.h @@ -3,6 +3,49 @@ namespace ZEngine::Rendering::Renderers { + struct ScissorCmd + { + uint32_t w = 0; + uint32_t h = 0; + int32_t x = 0; + int32_t y = 0; + }; + struct IndexedCmd + { + uint32_t IdxCount = 0; + uint32_t InstanceCount = 0; + uint32_t FirstIndex = 0; + int32_t VertexOffset = 0; + uint32_t FirstInstance = 0; + }; + + struct UIDrawVert + { + typedef struct _vec2 + { + float x, y; + } vec2; + vec2 pos; + vec2 uv; + unsigned int col; + }; + + struct RenderOverlayPayload + { + bool IsIndexBufferUint16 = false; + uint32_t VertexCount = 0; + uint32_t IndexCount = 0; + uint32_t DrawDataIndex = 0; + float Pc[4] = {0.0f}; // {Scale}-{Translate} + Hardwares::VertexBufferSetHandle VBHandle = {}; + Hardwares::IndexBufferSetHandle IdxBHandle = {}; + std::vector TextureIds = {}; + std::vector IndexData = {}; + std::vector VertexData = {}; + std::vector ScissorCmds = {}; + std::vector IndexedCmds = {}; + }; + struct RendererResourceName { inline static cstring FrameDepthRenderTargetName = "g_frame_depth_render_target"; @@ -22,4 +65,4 @@ namespace ZEngine::Rendering::Renderers virtual void Initialize(Hardwares::VulkanDevicePtr device) = 0; virtual void Deinitialize() = 0; }; -} // namespace ZEngine::Rendering::Renderers \ No newline at end of file +} // namespace ZEngine::Rendering::Renderers diff --git a/ZEngine/ZEngine/Rendering/Renderers/ImGUIRenderer.cpp b/ZEngine/ZEngine/Rendering/Renderers/ImGUIRenderer.cpp index c49a911c..aa0cb294 100644 --- a/ZEngine/ZEngine/Rendering/Renderers/ImGUIRenderer.cpp +++ b/ZEngine/ZEngine/Rendering/Renderers/ImGUIRenderer.cpp @@ -44,7 +44,7 @@ namespace ZEngine::Rendering::Renderers io.ConfigFlags |= ImGuiConfigFlags_NavEnableKeyboard; io.ConfigFlags |= ImGuiConfigFlags_DockingEnable; - io.ConfigFlags |= ImGuiConfigFlags_ViewportsEnable; + // io.ConfigFlags |= ImGuiConfigFlags_ViewportsEnable; auto& style = ImGui::GetStyle(); style.WindowBorderSize = 0.f; @@ -57,17 +57,44 @@ namespace ZEngine::Rendering::Renderers ImGui_ImplGlfw_InitForVulkan(reinterpret_cast(current_window), false); - m_vertex_buffer_handle = Device->CreateVertexBufferSet(); - m_index_buffer_handle = Device->CreateIndexBufferSet(); + VBHandle = Device->CreateVertexBufferSet(); + IdxBHandle = Device->CreateIndexBufferSet(); - auto vb_buffer_set = Device->VertexBufferSetManager.Access(m_vertex_buffer_handle); - auto idx_buffer_set = Device->IndexBufferSetManager.Access(m_index_buffer_handle); - for (unsigned i = 0; i < Device->SwapchainImageCount; ++i) + auto vb_buffer_set = Device->VertexBufferSetManager.Access(VBHandle); + auto idx_buffer_set = Device->IndexBufferSetManager.Access(IdxBHandle); + for (unsigned i = 0; i < Device->SwapchainPtr->BufferredFrameCount; ++i) { vb_buffer_set->At(i)->Allocate(ZMega(5), "ImguiVertexBuffer"); idx_buffer_set->At(i)->Allocate(ZMega(5), "ImguiIndexBuffer"); } + /* + * Font uploading + */ + AsyncResourceLoader::DeferralUpload deferral = {.Buffer = nullptr}; + deferral.Type = AsyncResourceLoader::UploadType::TEXTURE_BUFFER; + + unsigned char* pixels; + int width, height; + io.Fonts->GetTexDataAsRGBA32(&pixels, &width, &height); + size_t upload_size = width * height * 4 * sizeof(uint8_t); + + Specifications::TextureSpecification font_tex_spec = {}; + font_tex_spec.Width = width; + font_tex_spec.Height = height; + font_tex_spec.Format = Specifications::ImageFormat::R8G8B8A8_UNORM; + + auto font_tex_handle = Device->CreateTexture(font_tex_spec); + deferral.TexHandle = font_tex_handle; + deferral.Buffer = pixels; + + Device->AsyncResLoader->SubmitDeferral(std::move(deferral)); + + // We enqueue the tex handle so, we write the DescriptorSet at Present(...) + Device->TextureHandleToUpdates.Enqueue(font_tex_handle); + + io.Fonts->TexID = (ImTextureID) font_tex_handle.Index; + auto pass_builder = RenderGraph->RenderPassBuilder; pass_builder->SetName("Imgui Pass") .SetPipelineName("Imgui-Pipeline") @@ -95,105 +122,17 @@ namespace ZEngine::Rendering::Renderers .UseSwapchainAsRenderTarget(); - m_ui_pass = Device->CreateRenderPass(pass_builder->Detach()); - m_ui_pass->SetBindlessInput("TextureArray"); - m_ui_pass->Verify(); - m_ui_pass->Bake(); - /* - * Font uploading - */ - unsigned char* pixels; - int width, height; - io.Fonts->GetTexDataAsRGBA32(&pixels, &width, &height); - size_t upload_size = width * height * 4 * sizeof(uint8_t); - - Specifications::TextureSpecification font_tex_spec = {}; - font_tex_spec.Width = width; - font_tex_spec.Height = height; - font_tex_spec.Format = Specifications::ImageFormat::R8G8B8A8_UNORM; - - auto font_tex_handle = Device->CreateTexture(font_tex_spec); - auto font_tex_res = Device->GlobalTextures.Access(font_tex_handle); - auto img_buf = Device->Image2DBufferManager.Access(font_tex_res->BufferHandle); - auto image_buf_handle = img_buf->GetHandle(); - - auto command_buf = Device->GetInstantCommandBuffer(QueueType::GRAPHIC_QUEUE); - - Specifications::ImageMemoryBarrierSpecification barrier_spec_0 = {}; - barrier_spec_0.ImageHandle = image_buf_handle; - barrier_spec_0.OldLayout = img_buf->Layout; - barrier_spec_0.NewLayout = Specifications::ImageLayout::TRANSFER_DST_OPTIMAL; - barrier_spec_0.ImageAspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - barrier_spec_0.SourceAccessMask = VK_ACCESS_NONE; - barrier_spec_0.DestinationAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - barrier_spec_0.SourceStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; - barrier_spec_0.DestinationStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; - barrier_spec_0.LayerCount = font_tex_res->Specification.LayerCount; - Primitives::ImageMemoryBarrier barrier_0{barrier_spec_0}; - command_buf->TransitionImageLayout(barrier_0); - - img_buf->Layout = barrier_spec_0.NewLayout; - - Device->WriteTextureData(command_buf, font_tex_handle, pixels); - - Specifications::ImageMemoryBarrierSpecification barrier_spec_1 = {}; - barrier_spec_1.ImageHandle = image_buf_handle; - barrier_spec_1.OldLayout = img_buf->Layout; - barrier_spec_1.NewLayout = Specifications::ImageLayout::SHADER_READ_ONLY_OPTIMAL; - barrier_spec_1.ImageAspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - barrier_spec_1.SourceAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - barrier_spec_1.DestinationAccessMask = VK_ACCESS_SHADER_READ_BIT; - barrier_spec_1.SourceStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; - barrier_spec_1.DestinationStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - barrier_spec_1.LayerCount = font_tex_res->Specification.LayerCount; - Primitives::ImageMemoryBarrier barrier_1{barrier_spec_1}; - command_buf->TransitionImageLayout(barrier_1); - - img_buf->Layout = barrier_spec_1.NewLayout; - Device->EnqueueInstantCommandBuffer(command_buf); - - /* - * Dummy Texture - */ - auto dummy_tex_handle = Device->CreateTexture(1, 1, 255, 255, 255, 255); - auto dummy_tex_res = Device->GlobalTextures.Access(dummy_tex_handle); - auto dummy_tex_buf = Device->Image2DBufferManager.Access(dummy_tex_res->BufferHandle); - - io.Fonts->TexID = (ImTextureID) font_tex_handle.Index; - - auto font_image_info = img_buf->GetDescriptorImageInfo(); - auto dummy_image_info = dummy_tex_buf->GetDescriptorImageInfo(); - uint32_t frame_count = Device->SwapchainImageCount; - auto shader = m_ui_pass->Pipeline->Shader; - auto& descriptor_set_map = shader->DescriptorSetMap; - - auto scratch = ZGetScratch(Device->Arena); - Array write_descriptor_sets = {}; - write_descriptor_sets.init(scratch.Arena, frame_count); - - for (unsigned i = 0; i < frame_count; ++i) - { - for (const auto& [set, arr] : descriptor_set_map) - { - auto frame_set = arr[i]; - if (set == 0) // __unused - { - write_descriptor_sets.push(VkWriteDescriptorSet{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .pNext = nullptr, .dstSet = frame_set, .dstBinding = 0, .dstArrayElement = 0u /*(uint32_t) dummy_tex_handle.Index*/, .descriptorCount = 1, .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .pImageInfo = &(dummy_image_info), .pBufferInfo = nullptr, .pTexelBufferView = nullptr}); - continue; - } - - write_descriptor_sets.push(VkWriteDescriptorSet{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .pNext = nullptr, .dstSet = frame_set, .dstBinding = 0, .dstArrayElement = (uint32_t) font_tex_handle.Index, .descriptorCount = 1, .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .pImageInfo = &(font_image_info), .pBufferInfo = nullptr, .pTexelBufferView = nullptr}); - } - } - - vkUpdateDescriptorSets(Device->LogicalDevice, write_descriptor_sets.size(), write_descriptor_sets.data(), 0, nullptr); - - ZReleaseScratch(scratch); + UIPass = Device->CreateRenderPass(pass_builder->Detach()); + UIPass->SetBindlessInput("TextureArray"); + UIPass->SetInput("_unused", Device->GlobalLinearWrapSamplerImageInfo); + UIPass->SetInput("LinearWrapSampler", Device->GlobalLinearWrapSamplerImageInfo); + UIPass->Verify(); + UIPass->Bake(); } void ImGUIRenderer::Deinitialize() { - m_ui_pass->Dispose(); + UIPass->Dispose(); ImGui_ImplGlfw_Shutdown(); ImGui::DestroyContext(); @@ -245,10 +184,20 @@ namespace ZEngine::Rendering::Renderers ImGui::NewFrame(); ImGuizmo::BeginFrame(); } - - void ImGUIRenderer::DrawFrame(Hardwares::CommandBufferPtr const command_buffer) + void ImGUIRenderer::EndFrame() { + // The render method has EndFrame() ImGui::Render(); + ImGuiIO& io = ImGui::GetIO(); + if (io.ConfigFlags & ImGuiConfigFlags_ViewportsEnable) + { + ImGui::UpdatePlatformWindows(); + ImGui::RenderPlatformWindowsDefault(); + } + } + + void ImGUIRenderer::PreparePayload(RenderOverlayPayload& r_payload) + { ImDrawData* draw_data = ImGui::GetDrawData(); if (!draw_data) @@ -272,15 +221,21 @@ namespace ZEngine::Rendering::Renderers return; } - auto scratch = ZGetScratch(Device->Arena); + r_payload.VertexCount = vertex_count; + r_payload.IndexCount = index_count; + r_payload.IsIndexBufferUint16 = sizeof(ImDrawIdx) == 2; + r_payload.VBHandle = VBHandle; + r_payload.IdxBHandle = IdxBHandle; - Array vertex_data = {}; - Array index_data = {}; + r_payload.VertexData.clear(); + r_payload.IndexData.clear(); + r_payload.VertexData.shrink_to_fit(); + r_payload.IndexData.shrink_to_fit(); - vertex_data.init(scratch.Arena, vertex_count, vertex_count); - index_data.init(scratch.Arena, index_count, index_count); + r_payload.VertexData.resize(vertex_count); + r_payload.IndexData.resize(index_count); - ImDrawVert* vertex_data_ptr = vertex_data.data(); + UIDrawVert* vertex_data_ptr = r_payload.VertexData.data(); for (int n = 0; n < draw_data->CmdListsCount; ++n) { const ImDrawList* cmd_list = draw_data->CmdLists[n]; @@ -289,7 +244,7 @@ namespace ZEngine::Rendering::Renderers vertex_data_ptr += cmd_list->VtxBuffer.Size; } - ImDrawIdx* index_data_ptr = index_data.data(); + unsigned short* index_data_ptr = r_payload.IndexData.data(); for (int n = 0; n < draw_data->CmdListsCount; ++n) { const ImDrawList* cmd_list = draw_data->CmdLists[n]; @@ -298,35 +253,14 @@ namespace ZEngine::Rendering::Renderers index_data_ptr += cmd_list->IdxBuffer.Size; } - auto vtx_data_view = ArrayView{vertex_data}; - auto idx_data_view = ArrayView{index_data}; - - auto vertex_buffer_set = Device->VertexBufferSetManager.Access(m_vertex_buffer_handle); - auto index_buffer_set = Device->IndexBufferSetManager.Access(m_index_buffer_handle); - - auto vertex_buffer = vertex_buffer_set->At(Device->CurrentFrameIndex); - auto index_buffer = index_buffer_set->At(Device->CurrentFrameIndex); - - vertex_buffer->Write(vtx_data_view); - index_buffer->Write(idx_data_view); - - ZReleaseScratch(scratch); - - auto current_framebuffer = Device->SwapchainFramebuffers[Device->SwapchainImageIndex]; - - command_buffer->BeginRenderPass(m_ui_pass, current_framebuffer); - command_buffer->BindVertexBuffer(*vertex_buffer); - command_buffer->BindIndexBuffer(*index_buffer, sizeof(ImDrawIdx) == 2 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32); - // Setup scale and translation: // Our visible imgui space lies from draw_data->DisplayPps (top left) to // draw_data->DisplayPos+data_data->DisplaySize (bottom right). DisplayPos is (0,0) for single viewport apps. - PushConstantData pc_data = {}; - pc_data.Scale[0] = 2.0f / draw_data->DisplaySize.x; - pc_data.Scale[1] = 2.0f / draw_data->DisplaySize.y; - pc_data.Translate[0] = -1.0f - draw_data->DisplayPos.x * pc_data.Scale[0]; - pc_data.Translate[1] = -1.0f - draw_data->DisplayPos.y * pc_data.Scale[1]; + r_payload.Pc[0] = 2.0f / draw_data->DisplaySize.x; + r_payload.Pc[1] = 2.0f / draw_data->DisplaySize.y; + r_payload.Pc[2] = -1.0f - draw_data->DisplayPos.x * r_payload.Pc[0]; + r_payload.Pc[3] = -1.0f - draw_data->DisplayPos.y * r_payload.Pc[1]; // Will project scissor/clipping rectangles into framebuffer space ImVec2 clip_off = draw_data->DisplayPos; // (0,0) unless using multi-viewports @@ -334,6 +268,7 @@ namespace ZEngine::Rendering::Renderers // Render command lists // (Because we merged all buffers into a single one, we maintain our own offset into them) + int global_vtx_offset = 0; int global_idx_offset = 0; for (int n = 0; n < draw_data->CmdListsCount; n++) @@ -359,29 +294,21 @@ namespace ZEngine::Rendering::Renderers { // Apply scissor/clipping rectangle VkRect2D scissor; - scissor.offset.x = (int32_t) (clip_rect.x); - scissor.offset.y = (int32_t) (clip_rect.y); - scissor.extent.width = (uint32_t) (clip_rect.z - clip_rect.x); - scissor.extent.height = (uint32_t) (clip_rect.w - clip_rect.y); - command_buffer->SetScissor(scissor); - - pc_data.TextureId = (uint32_t) (intptr_t) pcmd->TextureId; - command_buffer->PushConstants(VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(PushConstantData), &pc_data); - command_buffer->BindDescriptorSets(Device->CurrentFrameIndex); - command_buffer->DrawIndexed(pcmd->ElemCount, 1, pcmd->IdxOffset + global_idx_offset, pcmd->VtxOffset + global_vtx_offset, 0); + scissor.offset.x = (int32_t) (clip_rect.x); + scissor.offset.y = (int32_t) (clip_rect.y); + scissor.extent.width = (uint32_t) (clip_rect.z - clip_rect.x); + scissor.extent.height = (uint32_t) (clip_rect.w - clip_rect.y); + + r_payload.TextureIds[r_payload.DrawDataIndex] = (uint32_t) (intptr_t) pcmd->TextureId; + r_payload.ScissorCmds[r_payload.DrawDataIndex] = ScissorCmd{scissor.extent.width, scissor.extent.height, scissor.offset.x, scissor.offset.y}; + r_payload.IndexedCmds[r_payload.DrawDataIndex] = IndexedCmd{pcmd->ElemCount, 1, pcmd->IdxOffset + global_idx_offset, (int32_t) (pcmd->VtxOffset + global_vtx_offset), 0}; + + r_payload.DrawDataIndex++; } } } global_idx_offset += cmd_list->IdxBuffer.Size; global_vtx_offset += cmd_list->VtxBuffer.Size; } - command_buffer->EndRenderPass(); - - ImGuiIO& io = ImGui::GetIO(); - if (io.ConfigFlags & ImGuiConfigFlags_ViewportsEnable) - { - ImGui::UpdatePlatformWindows(); - ImGui::RenderPlatformWindowsDefault(); - } } } // namespace ZEngine::Rendering::Renderers diff --git a/ZEngine/ZEngine/Rendering/Renderers/ImGUIRenderer.h b/ZEngine/ZEngine/Rendering/Renderers/ImGUIRenderer.h index 0909111d..b0437e6e 100644 --- a/ZEngine/ZEngine/Rendering/Renderers/ImGUIRenderer.h +++ b/ZEngine/ZEngine/Rendering/Renderers/ImGUIRenderer.h @@ -10,22 +10,24 @@ namespace ZEngine::Rendering::Renderers float Scale[2] = {0}; float Translate[2] = {0}; uint32_t TextureId = 0xFFFFFFFFu; + uint32_t padding = 0xFFFFFFFFu; }; struct ImGUIRenderer : public IRenderer { - void Initialize(Hardwares::VulkanDevicePtr device) override; - void Deinitialize() override; - void StyleDarkTheme(); + RenderPasses::RenderPass* UIPass = nullptr; + Hardwares::VertexBufferSetHandle VBHandle = {}; + Hardwares::IndexBufferSetHandle IdxBHandle = {}; - void NewFrame(); - void DrawFrame(Hardwares::CommandBuffer* const command_buffer); + void Initialize(Hardwares::VulkanDevicePtr device) override; + void Deinitialize() override; - private: - Hardwares::VertexBufferSetHandle m_vertex_buffer_handle; - Hardwares::IndexBufferSetHandle m_index_buffer_handle; - RenderPasses::RenderPass* m_ui_pass; + void StyleDarkTheme(); + + void NewFrame(); + void EndFrame(); + void PreparePayload(RenderOverlayPayload& payload); }; ZDEFINE_PTR(ImGUIRenderer); diff --git a/ZEngine/ZEngine/Rendering/Renderers/RenderGraph.cpp b/ZEngine/ZEngine/Rendering/Renderers/RenderGraph.cpp index ca1c1cd1..8b10ed64 100644 --- a/ZEngine/ZEngine/Rendering/Renderers/RenderGraph.cpp +++ b/ZEngine/ZEngine/Rendering/Renderers/RenderGraph.cpp @@ -271,7 +271,7 @@ namespace ZEngine::Rendering::Renderers } } - node.CallbackPass->Execute(Device, SceneData, node.Handle, node.Framebuffer, command_buffer); + node.CallbackPass->Execute(Device, ResourceInspector, SceneData, node.Handle, node.Framebuffer, command_buffer); } } @@ -430,7 +430,7 @@ namespace ZEngine::Rendering::Renderers { Graph->ResourceMap[name].Name = name; Graph->ResourceMap[name].Type = RenderGraphResourceType::TEXTURE; - Graph->ResourceMap[name].ResourceInfo.TextureHandle = Graph->Device->AsyncResLoader->LoadTextureFile(filename); + Graph->ResourceMap[name].ResourceInfo.TextureHandle = Graph->Device->AsyncResLoader->Submit(0, 1 /* 1 : just for testing*/, {.TextureUpload = {.Filename = filename}}); return Graph->ResourceMap[name]; } diff --git a/ZEngine/ZEngine/Rendering/Renderers/RenderGraph.h b/ZEngine/ZEngine/Rendering/Renderers/RenderGraph.h index e6bba5d8..8697ff5f 100644 --- a/ZEngine/ZEngine/Rendering/Renderers/RenderGraph.h +++ b/ZEngine/ZEngine/Rendering/Renderers/RenderGraph.h @@ -81,9 +81,9 @@ namespace ZEngine::Rendering::Renderers struct IRenderGraphCallbackPass { - virtual void Setup(Hardwares::VulkanDevicePtr const device, cstring name, RenderGraphResourceBuilderPtr const res_builder, RenderGraphResourceInspectorPtr res_inspector) = 0; - virtual void Compile(Hardwares::VulkanDevicePtr const device, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPassBuilder* pass_builder, RenderGraphResourceInspectorPtr res_inspector, RenderPasses::RenderPass** const output_pass) = 0; - virtual void Execute(Hardwares::VulkanDevicePtr const device, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) = 0; + virtual void Setup(Hardwares::VulkanDevicePtr const device, cstring name, RenderGraphResourceBuilderPtr const res_builder, RenderGraphResourceInspectorPtr res_inspector) = 0; + virtual void Compile(Hardwares::VulkanDevicePtr const device, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPassBuilder* pass_builder, RenderGraphResourceInspectorPtr res_inspector, RenderPasses::RenderPass** const output_pass) = 0; + virtual void Execute(Hardwares::VulkanDevicePtr const device, RenderGraphResourceInspectorPtr res_inspector, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) = 0; }; struct RenderGraphNode diff --git a/ZEngine/ZEngine/Rendering/Renderers/RenderPasses/RenderPass.cpp b/ZEngine/ZEngine/Rendering/Renderers/RenderPasses/RenderPass.cpp index 211cb141..724ca679 100644 --- a/ZEngine/ZEngine/Rendering/Renderers/RenderPasses/RenderPass.cpp +++ b/ZEngine/ZEngine/Rendering/Renderers/RenderPasses/RenderPass.cpp @@ -22,7 +22,7 @@ namespace ZEngine::Rendering::Renderers::RenderPasses if (Specification.SwapchainAsRenderTarget) { - Specification.PipelineSpecification.Attachment = m_device->SwapchainAttachment; // Todo : Can potential Dispose() issue + Specification.PipelineSpecification.Attachment = m_device->SwapchainPtr->SwapchainAttachment; // Todo : Can potential Dispose() issue Pipeline = ZPushStructCtorArgs(m_device->Arena, Pipelines::GraphicPipeline); Pipeline->Initialize(m_device, std::move(Specification.PipelineSpecification)); } @@ -147,7 +147,7 @@ namespace ZEngine::Rendering::Renderers::RenderPasses const auto& spec = validity_output.second; auto shader = Pipeline->Shader; const auto& descriptor_set_map = shader->DescriptorSetMap; - auto frame_count = m_device->SwapchainImageCount; + auto frame_count = m_device->SwapchainPtr->BufferredFrameCount; auto ubo_buf = m_device->UniformBufferSetManager.Access(handle); auto write_reqs = std::vector(frame_count); @@ -178,7 +178,7 @@ namespace ZEngine::Rendering::Renderers::RenderPasses const auto& spec = validity_output.second; auto shader = Pipeline->Shader; const auto& descriptor_set_map = shader->DescriptorSetMap; - auto frame_count = m_device->SwapchainImageCount; + auto frame_count = m_device->SwapchainPtr->BufferredFrameCount; auto sbo_buf = m_device->StorageBufferSetManager.Access(handle); auto write_reqs = std::vector(frame_count); @@ -210,7 +210,7 @@ namespace ZEngine::Rendering::Renderers::RenderPasses auto shader = Pipeline->Shader; const auto& descriptor_set_map = shader->DescriptorSetMap; - auto frame_count = m_device->SwapchainImageCount; + auto frame_count = m_device->SwapchainPtr->BufferredFrameCount; auto tex_buf = m_device->GlobalTextures.Access(handle); auto img_buf = m_device->Image2DBufferManager.Access(tex_buf->BufferHandle); auto write_reqs = std::vector(frame_count); @@ -220,13 +220,40 @@ namespace ZEngine::Rendering::Renderers::RenderPasses auto set = descriptor_set_map.at(spec.Set)[i]; auto& image_info = img_buf->GetDescriptorImageInfo(); - write_reqs[i] = VkWriteDescriptorSet{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .pNext = nullptr, .dstSet = set, .dstBinding = spec.Binding, .dstArrayElement = 0, .descriptorCount = 1, .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .pImageInfo = &(image_info), .pBufferInfo = nullptr, .pTexelBufferView = nullptr}; + write_reqs[i] = VkWriteDescriptorSet{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .pNext = nullptr, .dstSet = set, .dstBinding = spec.Binding, .dstArrayElement = 0, .descriptorCount = 1, .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, .pImageInfo = &(image_info), .pBufferInfo = nullptr, .pTexelBufferView = nullptr}; } vkUpdateDescriptorSets(m_device->LogicalDevice, write_reqs.size(), write_reqs.data(), 0, nullptr); Inputs.insert(key_name.data()); } + void RenderPass::SetInput(cstring key_name, const VkDescriptorImageInfo& sampler_info) + { + auto validity_output = ValidateInput(key_name); + if (!validity_output.first) + { + return; + } + + const auto& spec = validity_output.second; + + auto shader = Pipeline->Shader; + const auto& descriptor_set_map = shader->DescriptorSetMap; + auto frame_count = m_device->SwapchainPtr->BufferredFrameCount; + + auto write_reqs = std::vector(frame_count); + + for (unsigned i = 0; i < frame_count; ++i) + { + auto set = descriptor_set_map.at(spec.Set)[i]; + + write_reqs[i] = VkWriteDescriptorSet{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .pNext = nullptr, .dstSet = set, .dstBinding = spec.Binding, .dstArrayElement = 0, .descriptorCount = 1, .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER, .pImageInfo = &(sampler_info), .pBufferInfo = nullptr, .pTexelBufferView = nullptr}; + } + vkUpdateDescriptorSets(m_device->LogicalDevice, write_reqs.size(), write_reqs.data(), 0, nullptr); + + Inputs.insert(key_name); + } + void RenderPass::SetBindlessInput(std::string_view key_name) { auto validity_output = ValidateInput(key_name); @@ -238,7 +265,7 @@ namespace ZEngine::Rendering::Renderers::RenderPasses auto shader = Pipeline->Shader; auto descriptor_set_map = shader->DescriptorSetMap; - auto frame_count = m_device->SwapchainImageCount; + auto frame_count = m_device->SwapchainPtr->BufferredFrameCount; for (unsigned i = 0; i < frame_count; ++i) { @@ -321,17 +348,17 @@ namespace ZEngine::Rendering::Renderers::RenderPasses ZRawPtr(Renderers::RenderPasses::Attachment) RenderPass::GetAttachment() const { - return Specification.SwapchainAsRenderTarget ? m_device->SwapchainAttachment : Attachment; + return Specification.SwapchainAsRenderTarget ? m_device->SwapchainPtr->SwapchainAttachment : Attachment; } uint32_t RenderPass::GetRenderAreaWidth() const { - return Specification.SwapchainAsRenderTarget ? m_device->SwapchainImageWidth : RenderAreaWidth; + return Specification.SwapchainAsRenderTarget ? m_device->SwapchainPtr->SwapchainImageWidth : RenderAreaWidth; } uint32_t RenderPass::GetRenderAreaHeight() const { - return Specification.SwapchainAsRenderTarget ? m_device->SwapchainImageHeight : RenderAreaHeight; + return Specification.SwapchainAsRenderTarget ? m_device->SwapchainPtr->SwapchainImageHeight : RenderAreaHeight; } std::pair RenderPass::ValidateInput(std::string_view key) diff --git a/ZEngine/ZEngine/Rendering/Renderers/RenderPasses/RenderPass.h b/ZEngine/ZEngine/Rendering/Renderers/RenderPasses/RenderPass.h index 0966d670..643d152c 100644 --- a/ZEngine/ZEngine/Rendering/Renderers/RenderPasses/RenderPass.h +++ b/ZEngine/ZEngine/Rendering/Renderers/RenderPasses/RenderPass.h @@ -42,6 +42,8 @@ namespace ZEngine::Rendering::Renderers::RenderPasses void SetInput(std::string_view key_name, const Hardwares::StorageBufferSetHandle& buffer); void SetInput(std::string_view key_name, const Textures::TextureHandle& texture); void SetBindlessInput(std::string_view key_name); + // Todo : This is a temporary solution, we should have a more abstract sampler resource in the future + void SetInput(cstring key_name, const VkDescriptorImageInfo& sampler_info); void UpdateInputBinding(); ZRawPtr(Renderers::RenderPasses::Attachment) GetAttachment() const; void UpdateRenderTargets(); @@ -93,4 +95,4 @@ namespace ZEngine::Rendering::Renderers::RenderPasses private: Specifications::RenderPassSpecification m_spec{}; }; -} // namespace ZEngine::Rendering::Renderers::RenderPasses \ No newline at end of file +} // namespace ZEngine::Rendering::Renderers::RenderPasses diff --git a/ZEngine/ZEngine/Rendering/Renderers/RendererPasses.cpp b/ZEngine/ZEngine/Rendering/Renderers/RendererPasses.cpp index ab28a645..c96e4771 100644 --- a/ZEngine/ZEngine/Rendering/Renderers/RendererPasses.cpp +++ b/ZEngine/ZEngine/Rendering/Renderers/RendererPasses.cpp @@ -7,21 +7,43 @@ using namespace ZEngine::Core::Containers; namespace ZEngine::Rendering::Renderers { - void InitialPass::Setup(Hardwares::VulkanDevicePtr const device, cstring name, RenderGraphResourceBuilderPtr const res_builder, RenderGraphResourceInspectorPtr res_inspector) + void UploadPass::Setup(Hardwares::VulkanDevicePtr const device, cstring name, RenderGraphResourceBuilderPtr const res_builder, RenderGraphResourceInspectorPtr res_inspector) { - VertexData.init(device->Arena, 3, make_initializer_list(device->Arena, 0.0f, 0.0f, 0.0f)); + WriteOnceControl.init(device->Arena, device->SwapchainPtr->BufferredFrameCount, device->SwapchainPtr->BufferredFrameCount); + + SkyboxVertexData.init(device->Arena, 24, make_initializer_list(device->Arena, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, -1.0f)); + SkyboxIndexData.init(device->Arena, 36, make_initializer_list(device->Arena, 0, 1, 2, 2, 3, 0, 1, 5, 6, 6, 2, 1, 5, 4, 7, 7, 6, 5, 4, 0, 3, 3, 7, 4, 3, 2, 6, 6, 7, 3, 4, 5, 1, 1, 0, 4)); + GridVertexData.init(device->Arena, 12, make_initializer_list(device->Arena, -1.0f, 0.0f, -1.0f, 1.0f, 0.0f, -1.0f, 1.0f, 0.0f, 1.0f, -1.0f, 0.0f, 1.0f)); + GridIndexData.init(device->Arena, 6, make_initializer_list(device->Arena, 0, 1, 2, 2, 3, 0)); + + const auto& skybox_res_vb_info = res_builder->CreateBufferSet("SkyboxVbSet", BufferSetCreationType::VERTEX); + const auto& skybox_res_ib_info = res_builder->CreateBufferSet("SkyboxIbSet", BufferSetCreationType::INDEX); + const auto& grid_res_vb_info = res_builder->CreateBufferSet("GridVbSet", BufferSetCreationType::VERTEX); + const auto& grid_res_ib_info = res_builder->CreateBufferSet("GridIbSet", BufferSetCreationType::INDEX); - auto& vb_res = res_builder->CreateBufferSet("initial_vertex_buffer", BufferSetCreationType::VERTEX); - VBHandle = vb_res.ResourceInfo.VertexBufferSetHandle; + SkyboxVBHandle = skybox_res_vb_info.ResourceInfo.VertexBufferSetHandle; + SkyboxIBHandle = skybox_res_ib_info.ResourceInfo.IndexBufferSetHandle; + GridVBHandle = grid_res_vb_info.ResourceInfo.VertexBufferSetHandle; + GridIBHandle = grid_res_ib_info.ResourceInfo.IndexBufferSetHandle; - auto vb_view = ArrayView{VertexData}; + auto count = device->SwapchainPtr->BufferredFrameCount; - auto buffer_set = device->VertexBufferSetManager.Access(VBHandle); - for (unsigned i = 0; i < device->SwapchainImageCount; ++i) + auto skybox_vb_buffer_set = device->VertexBufferSetManager.Access(SkyboxVBHandle); + auto skybox_ib_buffer_set = device->IndexBufferSetManager.Access(SkyboxIBHandle); + auto grid_vb_buffer_set = device->VertexBufferSetManager.Access(GridVBHandle); + auto grid_ib_buffer_set = device->IndexBufferSetManager.Access(GridIBHandle); + + for (int i = 0; i < count; ++i) { - auto buffer = buffer_set->At(i); - buffer->Allocate(vb_view.size_bytes(), "initial_vertex_buffer"); - buffer->Write(vb_view); + auto skybox_vb_view = ArrayView{SkyboxVertexData}; + auto skybox_ib_view = ArrayView{SkyboxIndexData}; + auto grid_vb_view = ArrayView{GridVertexData}; + auto grid_ib_view = ArrayView{GridIndexData}; + + skybox_vb_buffer_set->At(i)->Allocate(skybox_vb_view.size_bytes(), "SkyboxVb"); + skybox_ib_buffer_set->At(i)->Allocate(skybox_ib_view.size_bytes(), "SkyboxIb"); + grid_vb_buffer_set->At(i)->Allocate(grid_vb_view.size_bytes(), "GridVb"); + grid_ib_buffer_set->At(i)->Allocate(grid_ib_view.size_bytes(), "GridIb"); } RenderGraphRenderPassCreation pass_node = {.Name = name}; @@ -34,6 +56,79 @@ namespace ZEngine::Rendering::Renderers res_builder->CreateRenderPassNode(pass_node); } + void UploadPass::Compile(Hardwares::VulkanDevicePtr const device, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPassBuilder* pass_builder, RenderGraphResourceInspectorPtr res_inspector, RenderPasses::RenderPass** const output_pass) + { + // this compile action is purely fake, as this pass is only used to upload data to the buffers, + // and doesn't actually need a render pass. However, we need to create a dummy render pass to be able to execute this pass in the render graph. + if (output_pass && !(*output_pass)) + { + auto pass_spec = pass_builder->SetPipelineName("Initial-Pipeline") + .SetInputBindingCount(1) + .SetStride(0, sizeof(float) * 3) + .SetRate(0, VK_VERTEX_INPUT_RATE_VERTEX) + .SetInputAttributeCount(1) + .SetLocation(0, 0) + + .SetBinding(0, 0) + .SetFormat(0, Specifications::ImageFormat::R32G32B32_SFLOAT) + .SetOffset(0, 0) + + .EnablePipelineDepthTest(true) + .UseShader("initial") + .Detach(); + *output_pass = device->CreateRenderPass(pass_spec); + (*output_pass)->Bake(); + } + } + + void UploadPass::Execute(Hardwares::VulkanDevicePtr const device, RenderGraphResourceInspectorPtr res_inspector, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) + { + auto index = device->SwapchainPtr->CurrentFrame->Index; + if (WriteOnceControl[device->SwapchainPtr->CurrentFrame->Index] != 0) + { + return; + } + + auto skybox_vb_buffer_set = device->VertexBufferSetManager.Access(SkyboxVBHandle); + auto skybox_ib_buffer_set = device->IndexBufferSetManager.Access(SkyboxIBHandle); + auto grid_vb_buffer_set = device->VertexBufferSetManager.Access(GridVBHandle); + auto grid_ib_buffer_set = device->IndexBufferSetManager.Access(GridIBHandle); + + skybox_vb_buffer_set->At(index)->Write(index, 0, ArrayView{SkyboxVertexData}); + skybox_ib_buffer_set->At(index)->Write(index, 0, ArrayView{SkyboxIndexData}); + grid_vb_buffer_set->At(index)->Write(index, 0, ArrayView{GridVertexData}); + grid_ib_buffer_set->At(index)->Write(index, 0, ArrayView{GridIndexData}); + + WriteOnceControl[device->SwapchainPtr->CurrentFrame->Index] = 1; + } + + void InitialPass::Setup(Hardwares::VulkanDevicePtr const device, cstring name, RenderGraphResourceBuilderPtr const res_builder, RenderGraphResourceInspectorPtr res_inspector) + { + // VertexData.init(device->Arena, 3, make_initializer_list(device->Arena, 0.0f, 0.0f, 0.0f)); + + // auto& vb_res = res_builder->CreateBufferSet("initial_vertex_buffer", BufferSetCreationType::VERTEX); + // VBHandle = vb_res.ResourceInfo.VertexBufferSetHandle; + + // auto vb_view = ArrayView{VertexData}; + + // auto buffer_set = device->VertexBufferSetManager.Access(VBHandle); + // for (unsigned i = 0; i < device->SwapchainPtr->BufferredFrameCount; ++i) + //{ + // auto buffer = buffer_set->At(i); + // buffer->Allocate(vb_view.size_bytes(), "initial_vertex_buffer"); + // buffer->Write(i, 0, vb_view); + // } + + RenderGraphRenderPassCreation pass_node = {.Name = name}; + + pass_node.Inputs.init(device->Arena, 2); + pass_node.Outputs.init(device->Arena, 1); + pass_node.Inputs.push(RenderGraphRenderPassInputOutputInfo{.Name = RendererResourceName::FrameDepthRenderTargetName}); + pass_node.Inputs.push(RenderGraphRenderPassInputOutputInfo{.Name = RendererResourceName::FrameColorRenderTargetName}); + + res_builder->CreateRenderPassNode(pass_node); + } + void InitialPass::Compile(Hardwares::VulkanDevicePtr const device, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPassBuilder* pass_builder, RenderGraphResourceInspectorPtr res_inspector, RenderPasses::RenderPass** const output_pass) { if (output_pass && !(*output_pass)) @@ -57,14 +152,21 @@ namespace ZEngine::Rendering::Renderers } } - void InitialPass::Execute(Hardwares::VulkanDevicePtr const device, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) + void InitialPass::Execute(Hardwares::VulkanDevicePtr const device, RenderGraphResourceInspectorPtr res_inspector, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) { auto buffer_set = device->VertexBufferSetManager.Access(VBHandle); - auto vertex_buffer = buffer_set->At(device->CurrentFrameIndex); + auto vertex_buffer = buffer_set->At(device->SwapchainPtr->CurrentFrame->Index); - command_buffer->BeginRenderPass(pass, framebuffer->Handle); + command_buffer->BeginRenderPass(pass, framebuffer->Handle, false); + { + uint32_t w = pass->GetRenderAreaWidth(); + uint32_t h = pass->GetRenderAreaHeight(); + command_buffer->SetViewport(w, h); + command_buffer->SetScissor(w, h); + } + command_buffer->BindPipeline(Specifications::PipelineBindPoint::GRAPHIC, pass->Pipeline); command_buffer->BindVertexBuffer(*vertex_buffer); - command_buffer->BindDescriptorSets(device->CurrentFrameIndex); + command_buffer->BindDescriptorSets(device->SwapchainPtr->CurrentFrame->Index); command_buffer->Draw(1, 1, 0, 0); command_buffer->EndRenderPass(); } @@ -108,7 +210,7 @@ namespace ZEngine::Rendering::Renderers } } - void DepthPrePass::Execute(Hardwares::VulkanDevicePtr const device, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) + void DepthPrePass::Execute(Hardwares::VulkanDevicePtr const device, RenderGraphResourceInspectorPtr res_inspector, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) { if (!scene || !scene->IndirectBufferHandle) { @@ -116,22 +218,45 @@ namespace ZEngine::Rendering::Renderers } auto indirect_buffer = device->IndirectBufferSetManager.Access(scene->IndirectBufferHandle); - command_buffer->BeginRenderPass(pass, framebuffer->Handle); - command_buffer->BindDescriptorSets(device->CurrentFrameIndex); - command_buffer->DrawIndirect(*indirect_buffer->At(device->CurrentFrameIndex)); + command_buffer->BeginRenderPass(pass, framebuffer->Handle, false); + { + uint32_t w = pass->GetRenderAreaWidth(); + uint32_t h = pass->GetRenderAreaHeight(); + command_buffer->SetViewport(w, h); + command_buffer->SetScissor(w, h); + } + command_buffer->BindPipeline(Specifications::PipelineBindPoint::GRAPHIC, pass->Pipeline); + command_buffer->BindDescriptorSets(device->SwapchainPtr->CurrentFrame->Index); + command_buffer->DrawIndirect(*indirect_buffer->At(device->SwapchainPtr->CurrentFrame->Index)); command_buffer->EndRenderPass(); } void SkyboxPass::Setup(Hardwares::VulkanDevicePtr const device, cstring name, RenderGraphResourceBuilderPtr const res_builder, RenderGraphResourceInspectorPtr res_inspector) { - m_index_data.init(device->Arena, 36, make_initializer_list(device->Arena, 0, 1, 2, 2, 3, 0, 1, 5, 6, 6, 2, 1, 5, 4, 7, 7, 6, 5, 4, 0, 3, 3, 7, 4, 3, 2, 6, 6, 7, 3, 4, 5, 1, 1, 0, 4)); - m_vertex_data.init(device->Arena, 24, make_initializer_list(device->Arena, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, -1.0f)); - auto env_map_res = res_builder->CreateTexture("skybox_env_map", "Settings/EnvironmentMaps/bergen_4k.hdr"); m_env_map = env_map_res.ResourceInfo.TextureHandle; - m_vb_handle = device->CreateVertexBufferSet(); - m_ib_handle = device->CreateIndexBufferSet(); + // m_vb_handle = device->CreateVertexBufferSet(); + // m_ib_handle = device->CreateIndexBufferSet(); + + // auto count = device->SwapchainPtr->BufferredFrameCount; + // auto vtx_buffer_set = device->VertexBufferSetManager.Access(m_vb_handle); + // auto idx_buffer_set = device->IndexBufferSetManager.Access(m_ib_handle); + + // auto vtx_buf_view = ArrayView{m_vertex_data}; + // auto idx_buf_view = ArrayView{m_index_data}; + + // for (int i = 0; i < count; ++i) + //{ + // auto vertex_buffer = vtx_buffer_set->At(i); + // auto index_buffer = idx_buffer_set->At(i); + + // vertex_buffer->Allocate(vtx_buf_view.size_bytes(), "SkyboxPassVtx"); + // index_buffer->Allocate(idx_buf_view.size_bytes(), "SkyboxPassIdx"); + + // vertex_buffer->Write(i, 0, vtx_buf_view); + // index_buffer->Write(i, 0, idx_buf_view); + //} auto& output_skybox = res_builder->CreateRenderTarget("skybox_render_target", {.Width = 1280, .Height = 780, .Format = ImageFormat::R8G8B8A8_UNORM}); RenderGraphRenderPassCreation pass_node = {.Name = name}; @@ -177,52 +302,63 @@ namespace ZEngine::Rendering::Renderers { (*output_pass)->SetInput("UBCamera", scene->SceneCameraBufferHandle); (*output_pass)->SetInput("EnvMap", m_env_map); + (*output_pass)->SetInput("LinearWrapSampler", device->GlobalLinearWrapSamplerImageInfo); } (*output_pass)->Verify(); } - void SkyboxPass::Execute(Hardwares::VulkanDevicePtr const device, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) + void SkyboxPass::Execute(Hardwares::VulkanDevicePtr const device, RenderGraphResourceInspectorPtr res_inspector, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) { - auto vertex_buffer = device->VertexBufferSetManager.Access(m_vb_handle); - auto index_buffer = device->IndexBufferSetManager.Access(m_ib_handle); + const auto& vb_handle = res_inspector->GetVertexBufferSet("SkyboxVbSet"); + const auto& ib_handle = res_inspector->GetIndexBufferSet("SkyboxIbSet"); + + auto vertex_buffer = device->VertexBufferSetManager.Access(vb_handle); + auto index_buffer = device->IndexBufferSetManager.Access(ib_handle); - command_buffer->BeginRenderPass(pass, framebuffer->Handle); - command_buffer->BindVertexBuffer(*vertex_buffer->At(device->CurrentFrameIndex)); - command_buffer->BindIndexBuffer(*index_buffer->At(device->CurrentFrameIndex), VK_INDEX_TYPE_UINT16); - command_buffer->BindDescriptorSets(device->CurrentFrameIndex); + command_buffer->BeginRenderPass(pass, framebuffer->Handle, false); + { + uint32_t w = pass->GetRenderAreaWidth(); + uint32_t h = pass->GetRenderAreaHeight(); + command_buffer->SetViewport(w, h); + command_buffer->SetScissor(w, h); + } + command_buffer->BindPipeline(Specifications::PipelineBindPoint::GRAPHIC, pass->Pipeline); + command_buffer->BindVertexBuffer(*vertex_buffer->At(device->SwapchainPtr->CurrentFrame->Index)); + command_buffer->BindIndexBuffer(*index_buffer->At(device->SwapchainPtr->CurrentFrame->Index), VK_INDEX_TYPE_UINT16); + command_buffer->BindDescriptorSets(device->SwapchainPtr->CurrentFrame->Index); command_buffer->DrawIndexed(36, 1, 0, 0, 0); command_buffer->EndRenderPass(); } void GridPass::Setup(Hardwares::VulkanDevicePtr const device, cstring name, RenderGraphResourceBuilderPtr const res_builder, RenderGraphResourceInspectorPtr res_inspector) { - auto arena = device->Arena; + auto arena = device->Arena; - m_index_data.init(arena, 6, make_initializer_list(arena, 0, 1, 2, 2, 3, 0)); - m_vertex_data.init(arena, 12, make_initializer_list(arena, -1.0f, 0.0f, -1.0f, 1.0f, 0.0f, -1.0f, 1.0f, 0.0f, 1.0f, -1.0f, 0.0f, 1.0f)); + // m_index_data.init(arena, 6, make_initializer_list(arena, 0, 1, 2, 2, 3, 0)); + // m_vertex_data.init(arena, 12, make_initializer_list(arena, -1.0f, 0.0f, -1.0f, 1.0f, 0.0f, -1.0f, 1.0f, 0.0f, 1.0f, -1.0f, 0.0f, 1.0f)); - m_vb_handle = device->CreateVertexBufferSet(); - m_ib_handle = device->CreateIndexBufferSet(); + // m_vb_handle = device->CreateVertexBufferSet(); + // m_ib_handle = device->CreateIndexBufferSet(); - auto count = device->SwapchainImageCount; - auto vtx_buffer_set = device->VertexBufferSetManager.Access(m_vb_handle); - auto idx_buffer_set = device->IndexBufferSetManager.Access(m_ib_handle); + // auto count = device->SwapchainPtr->BufferredFrameCount; + // auto vtx_buffer_set = device->VertexBufferSetManager.Access(m_vb_handle); + // auto idx_buffer_set = device->IndexBufferSetManager.Access(m_ib_handle); - auto vtx_buf_view = ArrayView{m_vertex_data}; - auto idx_buf_view = ArrayView{m_index_data}; + // auto vtx_buf_view = ArrayView{m_vertex_data}; + // auto idx_buf_view = ArrayView{m_index_data}; - for (int i = 0; i < count; ++i) - { - auto vertex_buffer = vtx_buffer_set->At(i); - auto index_buffer = idx_buffer_set->At(i); + // for (int i = 0; i < count; ++i) + //{ + // auto vertex_buffer = vtx_buffer_set->At(i); + // auto index_buffer = idx_buffer_set->At(i); - vertex_buffer->Allocate(vtx_buf_view.size_bytes(), "GridPassVtx"); - index_buffer->Allocate(idx_buf_view.size_bytes(), "GridPassIdx"); + // vertex_buffer->Allocate(vtx_buf_view.size_bytes(), "GridPassVtx"); + // index_buffer->Allocate(idx_buf_view.size_bytes(), "GridPassIdx"); - vertex_buffer->Write(vtx_buf_view); - index_buffer->Write(idx_buf_view); - } + // vertex_buffer->Write(i, 0, vtx_buf_view); + // index_buffer->Write(i, 0, idx_buf_view); + //} auto& output_grid = res_builder->CreateRenderTarget("grid_render_target", {.Width = 1280, .Height = 780, .Format = ImageFormat::R8G8B8A8_UNORM}); RenderGraphRenderPassCreation pass_node = {.Name = name}; @@ -270,18 +406,29 @@ namespace ZEngine::Rendering::Renderers } } - void GridPass::Execute(Hardwares::VulkanDevicePtr const device, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) + void GridPass::Execute(Hardwares::VulkanDevicePtr const device, RenderGraphResourceInspectorPtr res_inspector, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) { - auto vtx_buffer_set = device->VertexBufferSetManager.Access(m_vb_handle); - auto idx_buffer_set = device->IndexBufferSetManager.Access(m_ib_handle); - auto vertex_buffer = vtx_buffer_set->At(device->CurrentFrameIndex); - auto index_buffer = idx_buffer_set->At(device->CurrentFrameIndex); + auto vb_handle = res_inspector->GetVertexBufferSet("GridVbSet"); + auto id_handle = res_inspector->GetIndexBufferSet("GridIbSet"); + + auto vb_set = device->VertexBufferSetManager.Access(vb_handle); + auto ib_set = device->IndexBufferSetManager.Access(id_handle); - command_buffer->BeginRenderPass(pass, framebuffer->Handle); + auto vertex_buffer = vb_set->At(device->SwapchainPtr->CurrentFrame->Index); + auto index_buffer = ib_set->At(device->SwapchainPtr->CurrentFrame->Index); + + command_buffer->BeginRenderPass(pass, framebuffer->Handle, false); + { + uint32_t w = pass->GetRenderAreaWidth(); + uint32_t h = pass->GetRenderAreaHeight(); + command_buffer->SetViewport(w, h); + command_buffer->SetScissor(w, h); + } + command_buffer->BindPipeline(Specifications::PipelineBindPoint::GRAPHIC, pass->Pipeline); command_buffer->BindVertexBuffer(*vertex_buffer); command_buffer->BindIndexBuffer(*index_buffer, VK_INDEX_TYPE_UINT16); - command_buffer->BindDescriptorSets(device->CurrentFrameIndex); + command_buffer->BindDescriptorSets(device->SwapchainPtr->CurrentFrame->Index); command_buffer->DrawIndexed(6, 1, 0, 0, 0); command_buffer->EndRenderPass(); } @@ -339,15 +486,22 @@ namespace ZEngine::Rendering::Renderers } } - void GbufferPass::Execute(Hardwares::VulkanDevicePtr const device, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) + void GbufferPass::Execute(Hardwares::VulkanDevicePtr const device, RenderGraphResourceInspectorPtr res_inspector, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) { CHECK_AND_ESCAPE_NULL(scene) CHECK_AND_ESCAPE_NULL(scene->IndirectBufferHandle) auto indirect_buffer = device->IndirectBufferSetManager.Access(scene->IndirectBufferHandle); - command_buffer->BeginRenderPass(pass, framebuffer->Handle); - command_buffer->BindDescriptorSets(device->CurrentFrameIndex); - command_buffer->DrawIndirect(*indirect_buffer->At(device->CurrentFrameIndex)); + command_buffer->BeginRenderPass(pass, framebuffer->Handle, false); + { + uint32_t w = pass->GetRenderAreaWidth(); + uint32_t h = pass->GetRenderAreaHeight(); + command_buffer->SetViewport(w, h); + command_buffer->SetScissor(w, h); + } + command_buffer->BindPipeline(Specifications::PipelineBindPoint::GRAPHIC, pass->Pipeline); + command_buffer->BindDescriptorSets(device->SwapchainPtr->CurrentFrame->Index); + command_buffer->DrawIndirect(*indirect_buffer->At(device->SwapchainPtr->CurrentFrame->Index)); command_buffer->EndRenderPass(); } @@ -409,7 +563,7 @@ namespace ZEngine::Rendering::Renderers //(*pass)->Verify(); } - void LightingPass::Execute(Hardwares::VulkanDevicePtr const device, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) + void LightingPass::Execute(Hardwares::VulkanDevicePtr const device, RenderGraphResourceInspectorPtr res_inspector, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) { // auto directional_light_buffer_handle = graph->GetStorageBufferSet("g_scene_directional_light_buffer"); // auto point_light_buffer_handle = graph->GetStorageBufferSet("g_scene_point_light_buffer"); diff --git a/ZEngine/ZEngine/Rendering/Renderers/RendererPasses.h b/ZEngine/ZEngine/Rendering/Renderers/RendererPasses.h index 0c185305..3438674c 100644 --- a/ZEngine/ZEngine/Rendering/Renderers/RendererPasses.h +++ b/ZEngine/ZEngine/Rendering/Renderers/RendererPasses.h @@ -14,6 +14,25 @@ namespace ZEngine::Rendering::Renderers { + struct UploadPass : public IRenderGraphCallbackPass + { + Core::Containers::Array SkyboxVertexData = {}; + Core::Containers::Array GridVertexData = {}; + Core::Containers::Array SkyboxIndexData = {}; + Core::Containers::Array GridIndexData = {}; + + Hardwares::VertexBufferSetHandle SkyboxVBHandle = {}; + Hardwares::VertexBufferSetHandle GridVBHandle = {}; + Hardwares::IndexBufferSetHandle SkyboxIBHandle = {}; + Hardwares::IndexBufferSetHandle GridIBHandle = {}; + + Core::Containers::Array WriteOnceControl = {}; + + virtual void Setup(Hardwares::VulkanDevicePtr const device, cstring name, RenderGraphResourceBuilderPtr const res_builder, RenderGraphResourceInspectorPtr res_inspector) override; + virtual void Compile(Hardwares::VulkanDevicePtr const device, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPassBuilder* pass_builder, RenderGraphResourceInspectorPtr res_inspector, RenderPasses::RenderPass** const output_pass) override; + virtual void Execute(Hardwares::VulkanDevicePtr const device, RenderGraphResourceInspectorPtr res_inspector, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) override; + }; + struct InitialPass : public IRenderGraphCallbackPass { Core::Containers::Array VertexData = {}; @@ -21,55 +40,45 @@ namespace ZEngine::Rendering::Renderers virtual void Setup(Hardwares::VulkanDevicePtr const device, cstring name, RenderGraphResourceBuilderPtr const res_builder, RenderGraphResourceInspectorPtr res_inspector) override; virtual void Compile(Hardwares::VulkanDevicePtr const device, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPassBuilder* pass_builder, RenderGraphResourceInspectorPtr res_inspector, RenderPasses::RenderPass** const output_pass) override; - virtual void Execute(Hardwares::VulkanDevicePtr const device, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) override; + virtual void Execute(Hardwares::VulkanDevicePtr const device, RenderGraphResourceInspectorPtr res_inspector, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) override; }; struct DepthPrePass : public IRenderGraphCallbackPass { virtual void Setup(Hardwares::VulkanDevicePtr const device, cstring name, RenderGraphResourceBuilderPtr const res_builder, RenderGraphResourceInspectorPtr res_inspector) override; virtual void Compile(Hardwares::VulkanDevicePtr const device, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPassBuilder* pass_builder, RenderGraphResourceInspectorPtr res_inspector, RenderPasses::RenderPass** const output_pass) override; - virtual void Execute(Hardwares::VulkanDevicePtr const device, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) override; + virtual void Execute(Hardwares::VulkanDevicePtr const device, RenderGraphResourceInspectorPtr res_inspector, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) override; }; struct SkyboxPass : public IRenderGraphCallbackPass { virtual void Setup(Hardwares::VulkanDevicePtr const device, cstring name, RenderGraphResourceBuilderPtr const res_builder, RenderGraphResourceInspectorPtr res_inspector) override; virtual void Compile(Hardwares::VulkanDevicePtr const device, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPassBuilder* pass_builder, RenderGraphResourceInspectorPtr res_inspector, RenderPasses::RenderPass** const output_pass) override; - virtual void Execute(Hardwares::VulkanDevicePtr const device, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) override; + virtual void Execute(Hardwares::VulkanDevicePtr const device, RenderGraphResourceInspectorPtr res_inspector, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) override; private: - Hardwares::VertexBufferSetHandle m_vb_handle = {}; - Hardwares::IndexBufferSetHandle m_ib_handle = {}; - Textures::TextureHandle m_env_map = {}; - Core::Containers::Array m_index_data = {}; - Core::Containers::Array m_vertex_data = {}; + Textures::TextureHandle m_env_map = {}; }; struct GridPass : public IRenderGraphCallbackPass { virtual void Setup(Hardwares::VulkanDevicePtr const device, cstring name, RenderGraphResourceBuilderPtr const res_builder, RenderGraphResourceInspectorPtr res_inspector) override; virtual void Compile(Hardwares::VulkanDevicePtr const device, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPassBuilder* pass_builder, RenderGraphResourceInspectorPtr res_inspector, RenderPasses::RenderPass** const output_pass) override; - virtual void Execute(Hardwares::VulkanDevicePtr const device, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) override; - - private: - Core::Containers::Array m_index_data = {}; - Core::Containers::Array m_vertex_data = {}; - Hardwares::VertexBufferSetHandle m_vb_handle = {}; - Hardwares::IndexBufferSetHandle m_ib_handle = {}; + virtual void Execute(Hardwares::VulkanDevicePtr const device, RenderGraphResourceInspectorPtr res_inspector, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) override; }; struct GbufferPass : public IRenderGraphCallbackPass { virtual void Setup(Hardwares::VulkanDevicePtr const device, cstring name, RenderGraphResourceBuilderPtr const res_builder, RenderGraphResourceInspectorPtr res_inspector) override; virtual void Compile(Hardwares::VulkanDevicePtr const device, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPassBuilder* pass_builder, RenderGraphResourceInspectorPtr res_inspector, RenderPasses::RenderPass** const output_pass) override; - virtual void Execute(Hardwares::VulkanDevicePtr const device, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) override; + virtual void Execute(Hardwares::VulkanDevicePtr const device, RenderGraphResourceInspectorPtr res_inspector, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) override; }; struct LightingPass : public IRenderGraphCallbackPass { virtual void Setup(Hardwares::VulkanDevicePtr const device, cstring name, RenderGraphResourceBuilderPtr const res_builder, RenderGraphResourceInspectorPtr res_inspector) override; virtual void Compile(Hardwares::VulkanDevicePtr const device, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPassBuilder* pass_builder, RenderGraphResourceInspectorPtr res_inspector, RenderPasses::RenderPass** const output_pass) override; - virtual void Execute(Hardwares::VulkanDevicePtr const device, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) override; + virtual void Execute(Hardwares::VulkanDevicePtr const device, RenderGraphResourceInspectorPtr res_inspector, Rendering::Scenes::SceneDataPtr const scene, RenderPasses::RenderPass* const pass, Buffers::FramebufferVNext* const framebuffer, Hardwares::CommandBufferPtr const command_buffer) override; }; } // namespace ZEngine::Rendering::Renderers \ No newline at end of file diff --git a/ZEngine/ZEngine/Rendering/Scenes/GraphicScene.cpp b/ZEngine/ZEngine/Rendering/Scenes/GraphicScene.cpp index 0ec1dad1..e09d6687 100644 --- a/ZEngine/ZEngine/Rendering/Scenes/GraphicScene.cpp +++ b/ZEngine/ZEngine/Rendering/Scenes/GraphicScene.cpp @@ -209,7 +209,7 @@ namespace ZEngine::Rendering::Scenes if (!std::string_view(mat_files.AlbedoTexture).empty()) { - auto handle = async_loader->LoadTextureFile(mat_files.AlbedoTexture); + auto handle = async_loader->Submit(0, 0, {.TextureUpload = {.Filename = mat_files.AlbedoTexture}}); if (handle) { mat.AlbedoMap = handle.Index; @@ -218,7 +218,7 @@ namespace ZEngine::Rendering::Scenes if (!std::string_view(mat_files.EmissiveTexture).empty()) { - auto handle = async_loader->LoadTextureFile(mat_files.EmissiveTexture); + auto handle = async_loader->Submit(0, 0, {.TextureUpload = {.Filename = mat_files.EmissiveTexture}}); if (handle) { mat.EmissiveMap = handle.Index; @@ -227,7 +227,7 @@ namespace ZEngine::Rendering::Scenes if (!std::string_view(mat_files.NormalTexture).empty()) { - auto handle = async_loader->LoadTextureFile(mat_files.NormalTexture); + auto handle = async_loader->Submit(0, 0, {.TextureUpload = {.Filename = mat_files.NormalTexture}}); if (handle) { mat.NormalMap = handle.Index; @@ -236,7 +236,7 @@ namespace ZEngine::Rendering::Scenes if (!std::string_view(mat_files.OpacityTexture).empty()) { - auto handle = async_loader->LoadTextureFile(mat_files.OpacityTexture); + auto handle = async_loader->Submit(0, 0, {.TextureUpload = {.Filename = mat_files.OpacityTexture}}); if (handle) { mat.OpacityMap = handle.Index; @@ -245,7 +245,7 @@ namespace ZEngine::Rendering::Scenes if (!std::string_view(mat_files.SpecularTexture).empty()) { - auto handle = async_loader->LoadTextureFile(mat_files.SpecularTexture); + auto handle = async_loader->Submit(0, 0, {.TextureUpload = {.Filename = mat_files.SpecularTexture}}); if (handle) { mat.SpecularMap = handle.Index; diff --git a/ZEngine/ZEngine/Rendering/Shaders/Shader.cpp b/ZEngine/ZEngine/Rendering/Shaders/Shader.cpp index 488cbd20..eeb9de38 100644 --- a/ZEngine/ZEngine/Rendering/Shaders/Shader.cpp +++ b/ZEngine/ZEngine/Rendering/Shaders/Shader.cpp @@ -248,13 +248,25 @@ namespace ZEngine::Rendering::Shaders if (m_device->ShaderReservedLayoutBindingSpecificationMap.contains(set)) { - const auto& binding_specifications = m_device->ShaderReservedLayoutBindingSpecificationMap[set]; - LayoutBindingSpecificationMap[set].init(m_device->Arena, binding_specifications.size(), binding_specifications.size()); + const auto& binding_specifications = m_device->ShaderReservedLayoutBindingSpecificationMap.at(set); + LayoutBindingSpecification binding_spec = {}; + for (size_t i = 0; i < binding_specifications.size(); ++i) + { + const auto& spec = binding_specifications[i]; + if (Helpers::secure_strcmp(spec.Name, SI_resource.name.c_str()) == 0) + { + binding_spec = spec; + break; + } + } - for (uint32_t i = 0; i < binding_specifications.size(); ++i) + if (LayoutBindingSpecificationMap[set].capacity() <= 0) { - LayoutBindingSpecificationMap[set][i] = binding_specifications[i]; + LayoutBindingSpecificationMap[set].init(m_device->Arena, 2); } + + LayoutBindingSpecificationMap[set].push(std::move(binding_spec)); + continue; } uint32_t binding = spirv_compiler->get_decoration(SI_resource.id, spv::DecorationBinding); @@ -273,6 +285,94 @@ namespace ZEngine::Rendering::Shaders LayoutBindingSpecificationMap[set].push(LayoutBindingSpecification{.Set = set, .Binding = binding, .Count = count, .Name = name_c_str, .DescriptorTypeValue = DescriptorType::COMBINED_IMAGE_SAMPLER, .Flags = ShaderStageFlags::FRAGMENT}); } + + for (const auto& SI_resource : fragment_resources.separate_images) + { + uint32_t set = spirv_compiler->get_decoration(SI_resource.id, spv::DecorationDescriptorSet); + + if (m_device->ShaderReservedLayoutBindingSpecificationMap.contains(set)) + { + const auto& binding_specifications = m_device->ShaderReservedLayoutBindingSpecificationMap.at(set); + LayoutBindingSpecification binding_spec = {}; + for (size_t i = 0; i < binding_specifications.size(); ++i) + { + const auto& spec = binding_specifications[i]; + if (Helpers::secure_strcmp(spec.Name, SI_resource.name.c_str()) == 0) + { + binding_spec = spec; + break; + } + } + + if (LayoutBindingSpecificationMap[set].capacity() <= 0) + { + LayoutBindingSpecificationMap[set].init(m_device->Arena, 2); + } + + LayoutBindingSpecificationMap[set].push(std::move(binding_spec)); + + continue; + } + uint32_t binding = spirv_compiler->get_decoration(SI_resource.id, spv::DecorationBinding); + + const auto& type = spirv_compiler->get_type(SI_resource.type_id); + + uint32_t count = std::min(type.array.empty() ? 1 : type.array[0], 256u); + + if (LayoutBindingSpecificationMap[set].capacity() <= 0) + { + LayoutBindingSpecificationMap[set].init(m_device->Arena, 10); + } + auto name_c_size = (SI_resource.name.size() + 1u); + auto name_c_str = ZPushString(&LocalArena, name_c_size); + Helpers::secure_strcpy(name_c_str, name_c_size, SI_resource.name.c_str()); + + LayoutBindingSpecificationMap[set].push(LayoutBindingSpecification{.Set = set, .Binding = binding, .Count = count, .Name = name_c_str, .DescriptorTypeValue = DescriptorType::SAMPLED_IMAGE, .Flags = ShaderStageFlags::FRAGMENT}); + } + + for (const auto& SI_resource : fragment_resources.separate_samplers) + { + uint32_t set = spirv_compiler->get_decoration(SI_resource.id, spv::DecorationDescriptorSet); + + if (m_device->ShaderReservedLayoutBindingSpecificationMap.contains(set)) + { + const auto& binding_specifications = m_device->ShaderReservedLayoutBindingSpecificationMap.at(set); + LayoutBindingSpecification binding_spec = {}; + for (size_t i = 0; i < binding_specifications.size(); ++i) + { + const auto& spec = binding_specifications[i]; + if (Helpers::secure_strcmp(spec.Name, SI_resource.name.c_str()) == 0) + { + binding_spec = spec; + break; + } + } + + if (LayoutBindingSpecificationMap[set].capacity() <= 0) + { + LayoutBindingSpecificationMap[set].init(m_device->Arena, 2); + } + + LayoutBindingSpecificationMap[set].push(std::move(binding_spec)); + + continue; + } + uint32_t binding = spirv_compiler->get_decoration(SI_resource.id, spv::DecorationBinding); + + const auto& type = spirv_compiler->get_type(SI_resource.type_id); + + uint32_t count = std::min(type.array.empty() ? 1 : type.array[0], 256u); + + if (LayoutBindingSpecificationMap[set].capacity() <= 0) + { + LayoutBindingSpecificationMap[set].init(m_device->Arena, 10); + } + auto name_c_size = (SI_resource.name.size() + 1u); + auto name_c_str = ZPushString(&LocalArena, name_c_size); + Helpers::secure_strcpy(name_c_str, name_c_size, SI_resource.name.c_str()); + + LayoutBindingSpecificationMap[set].push(LayoutBindingSpecification{.Set = set, .Binding = binding, .Count = count, .Name = name_c_str, .DescriptorTypeValue = DescriptorType::SAMPLER, .Flags = ShaderStageFlags::FRAGMENT}); + } } } @@ -411,7 +511,7 @@ namespace ZEngine::Rendering::Shaders */ for (auto& pool_size : pool_size_collection) { - pool_size.descriptorCount *= m_device->SwapchainImageCount; + pool_size.descriptorCount *= m_device->SwapchainPtr->BufferredFrameCount; } /* * Create DescriptorPool @@ -426,7 +526,7 @@ namespace ZEngine::Rendering::Shaders VkDescriptorPoolCreateInfo pool_info = {}; pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; pool_info.flags = m_device->PhysicalDeviceSupportSampledImageBindless ? VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT : 0; - pool_info.maxSets = m_device->SwapchainImageCount; + pool_info.maxSets = m_device->SwapchainPtr->BufferredFrameCount; pool_info.poolSizeCount = pool_size_collection.size(); pool_info.pPoolSizes = pool_size_collection.data(); @@ -440,12 +540,12 @@ namespace ZEngine::Rendering::Shaders for (const auto layout : InternalDescriptorSetLayoutMap) { - DescriptorSetMap[layout.first].init(m_device->Arena, m_device->SwapchainImageCount, m_device->SwapchainImageCount); + DescriptorSetMap[layout.first].init(m_device->Arena, m_device->SwapchainPtr->BufferredFrameCount, m_device->SwapchainPtr->BufferredFrameCount); if (m_device->ShaderReservedDescriptorSetMap.contains(layout.first)) { // Since it's a Reserved Set, the Device already created the DescriptorSet - for (uint32_t i = 0; i < m_device->SwapchainImageCount; ++i) + for (uint32_t i = 0; i < m_device->SwapchainPtr->BufferredFrameCount; ++i) { DescriptorSetMap[layout.first][i] = m_device->ShaderReservedDescriptorSetMap.at(layout.first)[i]; } @@ -455,8 +555,8 @@ namespace ZEngine::Rendering::Shaders auto scratch = ZGetScratch(&LocalArena); Array layout_set = {}; - layout_set.init(scratch.Arena, m_device->SwapchainImageCount, m_device->SwapchainImageCount); - for (uint32_t i = 0; i < m_device->SwapchainImageCount; ++i) + layout_set.init(scratch.Arena, m_device->SwapchainPtr->BufferredFrameCount, m_device->SwapchainPtr->BufferredFrameCount); + for (uint32_t i = 0; i < m_device->SwapchainPtr->BufferredFrameCount; ++i) { layout_set[i] = layout.second; } @@ -464,7 +564,7 @@ namespace ZEngine::Rendering::Shaders VkDescriptorSetAllocateInfo descriptor_set_allocate_info = {}; descriptor_set_allocate_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; descriptor_set_allocate_info.descriptorPool = m_descriptor_pool; - descriptor_set_allocate_info.descriptorSetCount = m_device->SwapchainImageCount; + descriptor_set_allocate_info.descriptorSetCount = m_device->SwapchainPtr->BufferredFrameCount; descriptor_set_allocate_info.pSetLayouts = layout_set.data(); ZENGINE_VALIDATE_ASSERT(vkAllocateDescriptorSets(m_device->LogicalDevice, &descriptor_set_allocate_info, DescriptorSetMap[layout.first].data()) == VK_SUCCESS, "Failed to create DescriptorSet") diff --git a/ZEngine/ZEngine/Windows/GameWindow.cpp b/ZEngine/ZEngine/Windows/GameWindow.cpp index f6630deb..de53341f 100644 --- a/ZEngine/ZEngine/Windows/GameWindow.cpp +++ b/ZEngine/ZEngine/Windows/GameWindow.cpp @@ -7,7 +7,6 @@ #include #ifdef _WIN32 -#define GLFW_EXPOSE_NATIVE_WIN32 #include #include diff --git a/ZEngine/ZEngine/ZEngineDef.h b/ZEngine/ZEngine/ZEngineDef.h index 981e99c7..597e0203 100644 --- a/ZEngine/ZEngine/ZEngineDef.h +++ b/ZEngine/ZEngine/ZEngineDef.h @@ -104,3 +104,18 @@ #define SCENE_FILE_VERSION MAKE_VERSION(1, 0, 0) typedef const char* cstring; + +#ifdef __cpp_lib_hardware_interference_size +#include +#include +constexpr auto CACHE_LINE_SIZE = std::hardware_destructive_interference_size; +#else +#include +constexpr auto CACHE_LINE_SIZE = 64; +#endif + +template +struct alignas(CACHE_LINE_SIZE) PaddedAtomic +{ + std::atomic value = {}; +};