Skip to content

Commit 45c29bf

Browse files
fixed gpu crash on intel driver
1 parent 31a852a commit 45c29bf

14 files changed

Lines changed: 470 additions & 252 deletions

ZEngine/ZEngine/Applications/AppRenderPipeline.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,21 +44,24 @@ namespace ZEngine::Applications
4444

4545
void AppRenderPipeline::BeginFrame()
4646
{
47-
auto swpachain = Device->SwapchainPtr;
47+
auto swapchain = Device->SwapchainPtr;
4848

49-
swpachain->AcquireNextImage(CurrentMailBoxBufferHead);
49+
swapchain->AcquireNextImage(CurrentMailBoxBufferHead);
5050

5151
for (uint8_t thread_idx = 0; thread_idx < Device->CommandBufferMgr->TotalThreadCount; ++thread_idx)
5252
{
53-
Device->CommandBufferMgr->ResetPool(swpachain->CurrentFrame->Index, thread_idx);
53+
Device->CommandBufferMgr->ResetPool(swapchain->CurrentFrame->Index, thread_idx);
54+
// Device->AsyncResLoader->ResetCommandBuffers(swapchain->CurrentFrame->Index, thread_idx);
5455
}
5556

57+
Device->AsyncResLoader->CompleteDeferrals();
58+
5659
// uint8_t render_worker_thread_idx = RenderThreadIndex + 1;
5760
// for (uint8_t worker_thread_idx = 0; worker_thread_idx < RenderWorkerThreadCount; ++worker_thread_idx)
5861
// {
5962
// auto thread_idx = render_worker_thread_idx + worker_thread_idx;
6063
// }
61-
CurrentCmdBuf = Device->CommandBufferMgr->GetCommandBuffer(Rendering::QueueType::GRAPHIC_QUEUE, swpachain->CurrentFrame->Index, RenderMainThreadIndex, 0, true);
64+
CurrentCmdBuf = Device->CommandBufferMgr->GetCommandBuffer(Rendering::QueueType::GRAPHIC_QUEUE, swapchain->CurrentFrame->Index, RenderMainThreadIndex, 0, true);
6265
}
6366

6467
void AppRenderPipeline::EndFrame()

ZEngine/ZEngine/Engine.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ namespace ZEngine
9898

9999
auto pipeline = g_app->RenderPipeline;
100100

101-
uint32_t head = pipeline->MailBoxBufferHead.value.load(std::memory_order_relaxed);
101+
uint32_t head = pipeline->MailBoxBufferHead.value.load(std::memory_order_acquire);
102102
uint32_t next = (head + 1) % pipeline->MaxMailBoxBufferCount;
103103
uint32_t tail = pipeline->MailBoxBufferTail.value.load(std::memory_order_acquire);
104104

@@ -150,7 +150,7 @@ namespace ZEngine
150150

151151
auto pipeline = g_app->RenderPipeline;
152152

153-
uint32_t tail = pipeline->MailBoxBufferTail.value.load(std::memory_order_relaxed);
153+
uint32_t tail = pipeline->MailBoxBufferTail.value.load(std::memory_order_acquire);
154154
uint32_t head = pipeline->MailBoxBufferHead.value.load(std::memory_order_acquire);
155155

156156
// Buffer empty

ZEngine/ZEngine/Hardwares/AsyncResourceLoader.cpp

Lines changed: 152 additions & 101 deletions
Large diffs are not rendered by default.

ZEngine/ZEngine/Hardwares/AsyncResourceLoader.h

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ namespace ZEngine::Hardwares
99
struct VulkanDevice;
1010
struct AsyncGPUOperation;
1111
struct BufferView;
12+
struct CommandBuffer;
1213

1314
struct TextureFileRequest
1415
{
@@ -62,11 +63,30 @@ namespace ZEngine::Hardwares
6263
};
6364
};
6465

65-
std::atomic_uint64_t NextValue = 1;
66-
Rendering::Primitives::Semaphore* TextureTimeline = nullptr;
67-
Rendering::Primitives::Semaphore* BufferTimeline = nullptr;
68-
VulkanDevice* Device = nullptr;
69-
Core::Containers::Array<Core::Containers::Array<uint64_t>> RetireValues = {};
66+
struct TimelineJob
67+
{
68+
CommandBuffer* Buffer = nullptr;
69+
Rendering::Primitives::Semaphore* Timeline = nullptr;
70+
uint32_t WaitFlag = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
71+
uint64_t SignalValue = 0;
72+
};
73+
74+
struct DeferralUpload
75+
{
76+
UploadType UploadType;
77+
uint8_t FrameIdx = 0;
78+
uint8_t ThreadIdx = 0;
79+
unsigned char* Data = nullptr;
80+
Rendering::Textures::TextureHandle TexHandle = {};
81+
};
82+
83+
std::atomic_uint64_t NextValue = 1;
84+
std::atomic_uint32_t Counter = 0;
85+
Rendering::Primitives::Semaphore* Timeline = nullptr;
86+
VulkanDevice* Device = nullptr;
87+
Core::Containers::Array<Core::Containers::Array<uint64_t>> RetireValues = {};
88+
Helpers::ThreadSafeQueue<TimelineJob> AsyncTimelineJobQueue = {};
89+
Helpers::ThreadSafeQueue<DeferralUpload> DeferralUploadQueue = {};
7090

7191
void Initialize(VulkanDevice* device);
7292

@@ -77,8 +97,13 @@ namespace ZEngine::Hardwares
7797
Rendering::Textures::TextureHandle LoadTextureFile(cstring filename) = delete;
7898

7999
void Submit(UploadType type, uint8_t frame_index, uint8_t thread_index, const UploadRequest& request);
100+
void SubmitDeferral(DeferralUpload&& deferral);
80101
Rendering::Textures::TextureHandle Submit(uint8_t frame_index, uint8_t thread_index, const UploadRequest& request);
81102

103+
void CompleteDeferrals();
104+
void SubmitAsyncJobs();
105+
void ResetCommandBuffers(uint8_t frame_index, uint8_t thread_index);
106+
82107
void Run();
83108
void Shutdown();
84109

ZEngine/ZEngine/Hardwares/DeviceSwapchain.cpp

Lines changed: 46 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,7 @@ namespace ZEngine::Hardwares
300300
}
301301

302302
Device->CommandBufferMgr->EndEnqueuedBuffers();
303+
Device->AsyncResLoader->SubmitAsyncJobs();
303304

304305
auto scratch = ZGetScratch(&Arena);
305306

@@ -315,43 +316,73 @@ namespace ZEngine::Hardwares
315316
ZENGINE_VALIDATE_ASSERT(render_complete->GetState() != Rendering::Primitives::SemaphoreState::Submitted, "Signal semaphore is already in a signaled state.")
316317
ZENGINE_VALIDATE_ASSERT(CurrentFrame->Fence->GetState() != Rendering::Primitives::FenceState::Submitted, "Signal fence is already in a signaled state.")
317318

318-
Array<VkSemaphore> wait_semaphores = {};
319-
wait_semaphores.init(scratch.Arena, 10);
319+
struct TimelineAggregate
320+
{
321+
uint64_t MaxValue = 0;
322+
VkPipelineStageFlags StageMask = 0;
323+
};
320324

321-
HashMap<Primitives::Semaphore*, uint64_t> max_val_timeline_semaphores = {};
322-
Array<uint64_t> timeline_values = {};
325+
Array<VkSemaphore> wait_semaphores = {};
326+
Array<uint64_t> wait_values = {};
327+
HashMap<Primitives::Semaphore*, TimelineAggregate> max_val_timeline_semaphores = {};
328+
Array<VkPipelineStageFlags> stage_flags = {};
323329

330+
wait_semaphores.init(scratch.Arena, 5);
331+
stage_flags.init(scratch.Arena, 5);
332+
wait_values.init(scratch.Arena, 5);
324333
max_val_timeline_semaphores.init(scratch.Arena);
325-
timeline_values.init(scratch.Arena, 10);
334+
335+
wait_semaphores.push(CurrentFrame->Acquired->GetHandle());
336+
stage_flags.push(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT);
326337

327338
while (!Device->AsyncGPUOperations.Empty())
328339
{
329340
Hardwares::AsyncGPUOperationHandle op;
330341
if (Device->AsyncGPUOperations.Pop(op))
331342
{
332-
max_val_timeline_semaphores[op.Timeline] = std::max(max_val_timeline_semaphores[op.Timeline], op.SignalValue);
343+
if (!max_val_timeline_semaphores.contains(op.Timeline))
344+
{
345+
max_val_timeline_semaphores.insert(op.Timeline, {op.SignalValue, op.StageFlags});
346+
continue;
347+
}
348+
auto& val = max_val_timeline_semaphores[op.Timeline];
349+
val.MaxValue = std::max(val.MaxValue, op.SignalValue);
350+
val.StageMask |= op.StageFlags;
333351
}
334352
}
335353

336354
for (auto [sem, val] : max_val_timeline_semaphores)
337355
{
338356
wait_semaphores.push(sem->GetHandle());
339-
timeline_values.push(val);
357+
wait_values.push(val.MaxValue);
358+
stage_flags.push(val.StageMask);
340359
}
341360

342-
wait_semaphores.push(CurrentFrame->Acquired->GetHandle());
343-
timeline_values.push(0);
344-
345361
QueueView queue = Device->GetQueue(Rendering::QueueType::GRAPHIC_QUEUE);
346362
VkSemaphore signal_semaphores[] = {render_complete->GetHandle()};
347-
VkPipelineStageFlags stage_flags[] = {VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, /* we can adjust this later*/ VK_PIPELINE_STAGE_ALL_COMMANDS_BIT};
348-
VkSubmitInfo submit_info = {.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .waitSemaphoreCount = (uint32_t) wait_semaphores.size(), .pWaitSemaphores = wait_semaphores.data(), .pWaitDstStageMask = stage_flags, .commandBufferCount = (uint32_t) buffer.size(), .pCommandBuffers = buffer.data(), .signalSemaphoreCount = 1, .pSignalSemaphores = signal_semaphores};
349363

350-
VkTimelineSemaphoreSubmitInfo timeline_info = {.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, .waitSemaphoreValueCount = (uint32_t) timeline_values.size(), .pWaitSemaphoreValues = timeline_values.data()};
364+
VkTimelineSemaphoreSubmitInfo timeline_info = {
365+
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
366+
.pNext = nullptr,
367+
.waitSemaphoreValueCount = (uint32_t) wait_values.size(),
368+
.pWaitSemaphoreValues = wait_values.data(),
369+
.signalSemaphoreValueCount = 0,
370+
.pSignalSemaphoreValues = nullptr,
371+
};
351372

352-
submit_info.pNext = &timeline_info;
373+
VkSubmitInfo submit_info = {
374+
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
375+
.pNext = &timeline_info,
376+
.waitSemaphoreCount = (uint32_t) wait_semaphores.size(),
377+
.pWaitSemaphores = wait_semaphores.data(),
378+
.pWaitDstStageMask = stage_flags.data(),
379+
.commandBufferCount = (uint32_t) buffer.size(),
380+
.pCommandBuffers = buffer.data(),
381+
.signalSemaphoreCount = 1,
382+
.pSignalSemaphores = signal_semaphores,
383+
};
353384

354-
auto submit = vkQueueSubmit(queue.Handle, 1, &(submit_info), CurrentFrame->Fence->GetHandle());
385+
auto submit = vkQueueSubmit(queue.Handle, 1, &(submit_info), CurrentFrame->Fence->GetHandle());
355386
ZENGINE_VALIDATE_ASSERT(submit == VK_SUCCESS, "Failed to submit queue")
356387

357388
ZReleaseScratch(scratch);

ZEngine/ZEngine/Hardwares/VulkanDevice.cpp

Lines changed: 19 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -626,45 +626,39 @@ namespace ZEngine::Hardwares
626626
Instance = VK_NULL_HANDLE;
627627
}
628628

629-
void VulkanDevice::QueueSubmit(CommandBuffer* const command_buffer, Rendering::Primitives::Semaphore* const signal_semaphore, uint64_t signal_value, int wait_flag)
629+
void VulkanDevice::QueueSubmit(CommandBuffer* const command_buffer, Rendering::Primitives::Semaphore* const signal_semaphore, uint64_t signal_value, uint32_t wait_flag)
630630
{
631-
ZENGINE_VALIDATE_ASSERT(signal_semaphore->GetState() != Rendering::Primitives::SemaphoreState::Submitted, "Signal semaphore is already in a signaled state.")
632631
ZENGINE_VALIDATE_ASSERT(command_buffer->GetState() == CommandBufferState::Executable, "Command buffer must be in executable state to be submitted.")
633632
ZENGINE_VALIDATE_ASSERT(signal_semaphore->IsTimeline == true, "Signal semaphore must be a timeline semaphore.")
634633

635-
auto flag = (command_buffer->QueueType == QueueType::GRAPHIC_QUEUE) ? VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT : VK_PIPELINE_STAGE_TRANSFER_BIT;
636-
637-
if (wait_flag != -1)
638-
{
639-
flag = VkPipelineStageFlagBits(wait_flag);
640-
}
641-
642-
VkCommandBuffer command_buffers[] = {command_buffer->GetHandle()};
643-
VkSemaphore semaphores[] = {signal_semaphore->GetHandle()};
644-
VkSubmitInfo submit_info = {
645-
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
646-
.pNext = nullptr,
647-
.waitSemaphoreCount = 0,
648-
.pWaitSemaphores = nullptr,
649-
.pWaitDstStageMask = nullptr,
650-
.commandBufferCount = 1,
651-
.pCommandBuffers = command_buffers,
652-
.signalSemaphoreCount = 1,
653-
.pSignalSemaphores = semaphores,
634+
VkPipelineStageFlags flag = (wait_flag == UINT32_MAX) ? 0 : VkPipelineStageFlagBits(wait_flag);
635+
636+
VkCommandBuffer command_buffers[] = {command_buffer->GetHandle()};
637+
VkSemaphore semaphores[] = {signal_semaphore->GetHandle()};
638+
VkSubmitInfo submit_info = {
639+
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
640+
.pNext = nullptr,
641+
.waitSemaphoreCount = 0,
642+
.pWaitSemaphores = nullptr,
643+
.pWaitDstStageMask = &flag,
644+
.commandBufferCount = 1,
645+
.pCommandBuffers = command_buffers,
646+
.signalSemaphoreCount = 1,
647+
.pSignalSemaphores = semaphores,
654648
};
655649

650+
uint64_t signal_values[] = {signal_value};
656651
VkTimelineSemaphoreSubmitInfo timeline_semaphore_submit_info = {
657652
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
658653
.pNext = nullptr,
659654
.waitSemaphoreValueCount = 0,
660655
.pWaitSemaphoreValues = nullptr,
661656
.signalSemaphoreValueCount = 1,
662-
.pSignalSemaphoreValues = &signal_value,
657+
.pSignalSemaphoreValues = signal_values,
663658
};
664659
submit_info.pNext = &timeline_semaphore_submit_info;
665660
ZENGINE_VALIDATE_ASSERT(vkQueueSubmit(GetQueue(command_buffer->QueueType).Handle, 1, &submit_info, VK_NULL_HANDLE) == VK_SUCCESS, "Failed to submit queue")
666661
command_buffer->SetState(CommandBufferState::Pending);
667-
signal_semaphore->SetState(SemaphoreState::Submitted);
668662
}
669663

670664
bool VulkanDevice::QueueSubmit(const VkPipelineStageFlags wait_stage_flag, CommandBuffer* command_buffer, Rendering::Primitives::Semaphore* const signal_semaphore, Rendering::Primitives::Fence* const fence)
@@ -1215,6 +1209,7 @@ namespace ZEngine::Hardwares
12151209

12161210
if (idle_count < threshold)
12171211
{
1212+
std::this_thread::sleep_for(std::chrono::milliseconds(50));
12181213
continue;
12191214
}
12201215

@@ -1899,7 +1894,7 @@ namespace ZEngine::Hardwares
18991894
{
19001895
buffer->ResetState();
19011896
// Todo : We want to merge vkResetCommandBuffer with ResetState() when buffer is instant type
1902-
vkResetCommandBuffer(buffer->GetHandle(), 0);
1897+
// vkResetCommandBuffer(buffer->GetHandle(), VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT);
19031898
buffer->Begin();
19041899
}
19051900
return buffer;

ZEngine/ZEngine/Hardwares/VulkanDevice.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,7 @@ namespace ZEngine::Hardwares
565565
*/
566566
struct AsyncGPUOperationHandle
567567
{
568+
uint32_t StageFlags = 0;
568569
uint64_t SignalValue = 0;
569570
Rendering::Primitives::Semaphore* Timeline = nullptr;
570571
};
@@ -641,7 +642,7 @@ namespace ZEngine::Hardwares
641642
void Initialize(ZEngine::Core::Memory::ArenaAllocator* arena, Windows::CoreWindow* const window, uint32_t worker_thread_count);
642643
void Deinitialize();
643644
void Dispose();
644-
void QueueSubmit(CommandBuffer* const command_buffer, Rendering::Primitives::Semaphore* const signal_semaphore, uint64_t signal_value, int wait_flag = -1);
645+
void QueueSubmit(CommandBuffer* const command_buffer, Rendering::Primitives::Semaphore* const signal_semaphore, uint64_t signal_value, uint32_t wait_flag = UINT32_MAX);
645646
bool QueueSubmit(const VkPipelineStageFlags wait_stage_flag, CommandBuffer* const command_buffer, Rendering::Primitives::Semaphore* const signal_semaphore = nullptr, Rendering::Primitives::Fence* const fence = nullptr);
646647
void EnqueueAsyncGPUOperation(const AsyncGPUOperationHandle& handle);
647648
void EnqueueForDeletion(Rendering::DeviceResourceType resource_type, void* const resource_handle);

ZEngine/ZEngine/Rendering/Primitives/Semaphore.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,17 @@ namespace ZEngine::Rendering::Primitives
55
{
66
Semaphore::Semaphore(Hardwares::VulkanDevice* const device, bool is_timeline)
77
{
8-
IsTimeline = is_timeline;
9-
Device = device;
10-
VkSemaphoreTypeCreateInfo timeline_create_info = {};
11-
timeline_create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO;
12-
timeline_create_info.semaphoreType = is_timeline ? VK_SEMAPHORE_TYPE_TIMELINE : VK_SEMAPHORE_TYPE_BINARY;
13-
timeline_create_info.initialValue = 0;
8+
IsTimeline = is_timeline;
9+
Device = device;
10+
VkSemaphoreTypeCreateInfo timeline_create_info = {.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO};
1411

15-
VkSemaphoreCreateInfo semaphore_create_info = {};
16-
semaphore_create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
17-
semaphore_create_info.pNext = &timeline_create_info;
12+
VkSemaphoreCreateInfo semaphore_create_info = {.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO};
13+
if (is_timeline)
14+
{
15+
timeline_create_info.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE;
16+
timeline_create_info.initialValue = 0;
17+
semaphore_create_info.pNext = &timeline_create_info;
18+
}
1819

1920
ZENGINE_VALIDATE_ASSERT(vkCreateSemaphore(Device->LogicalDevice, &semaphore_create_info, nullptr, &m_handle) == VK_SUCCESS, "Failed to create Semaphore")
2021
}

0 commit comments

Comments
 (0)