diff --git a/include/API/Device.h b/include/API/Device.h index f99271233..6cdfcba7b 100644 --- a/include/API/Device.h +++ b/include/API/Device.h @@ -56,6 +56,20 @@ class Buffer { Buffer() = default; }; +class Fence { +public: + virtual ~Fence() = default; + + Fence(const Fence &) = delete; + Fence &operator=(const Fence &) = delete; + + virtual uint64_t getFenceValue() = 0; + virtual llvm::Error waitForCompletion(uint64_t SignalValue) = 0; + +protected: + Fence() = default; +}; + class Queue { public: virtual ~Queue() = 0; @@ -77,6 +91,9 @@ class Device { virtual Queue &getGraphicsQueue() = 0; + virtual llvm::Expected> + createFence(llvm::StringRef Name) = 0; + virtual llvm::Expected> createBuffer(std::string Name, BufferCreateDesc &Desc, size_t SizeInBytes) = 0; diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index 2eec8b777..14046aa1e 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -293,6 +293,82 @@ class DXBuffer : public offloadtest::Buffer { : Buffer(Buffer), Name(Name), Desc(Desc), SizeInBytes(SizeInBytes) {} }; +class DXFence : public offloadtest::Fence { +public: +#ifdef _WIN32 + DXFence(ComPtr Fence, HANDLE Event, llvm::StringRef Name) +#else // WSL + DXFence(ComPtr Fence, int Event, llvm::StringRef Name) +#endif + : Name(Name), Fence(Fence), Event(Event) { + } + + std::string Name; + ComPtr Fence; +#ifdef _WIN32 + HANDLE Event; +#else // WSL + int Event; +#endif + + static llvm::Expected> create(ID3D12Device *Device, + llvm::StringRef Name) { + ComPtr Fence; + if (auto Err = HR::toError( + Device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&Fence)), + "Failed to create Fence.")) + return Err; + +#ifdef _WIN32 + HANDLE Event = CreateEventA(nullptr, false, false, nullptr); + if (!Event) +#else // WSL + int Event = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); + if (Event == -1) +#endif + return llvm::createStringError(std::errc::device_or_resource_busy, + "Failed to create event."); + + return std::make_unique(Fence, Event, Name); + } + + ~DXFence() { +#ifdef _WIN32 + CloseHandle(Event); +#else // WSL + close(Event); +#endif + } + + uint64_t getFenceValue() override { return Fence->GetCompletedValue(); } + + llvm::Error waitForCompletion(uint64_t SignalValue) override { + if (Fence->GetCompletedValue() >= SignalValue) + return llvm::Error::success(); + +#ifdef _WIN32 + if (auto Err = HR::toError(Fence->SetEventOnCompletion(SignalValue, Event), + "Failed to register end event.")) + return Err; + WaitForSingleObject(Event, INFINITE); +#else // WSL + if (auto Err = + HR::toError(Fence->SetEventOnCompletion( + SignalValue, reinterpret_cast(Event)), + "Failed to register end event.")) + return Err; + pollfd PollEvent; + PollEvent.fd = Event; + PollEvent.events = POLLIN; + PollEvent.revents = 0; + if (poll(&PollEvent, 1, -1) == -1) + return llvm::createStringError( + std::error_code(errno, std::system_category()), strerror(errno)); +#endif + return llvm::Error::success(); + } +}; + class DXQueue : public offloadtest::Queue { public: ComPtr Queue; @@ -346,12 +422,7 @@ class DXDevice : public offloadtest::Device { ComPtr PSO; ComPtr Allocator; ComPtr CmdList; - ComPtr Fence; -#ifdef _WIN32 - HANDLE Event; -#else // WSL - int Event; -#endif + std::unique_ptr Fence; // Resources for graphics pipelines. ComPtr RT; @@ -378,6 +449,11 @@ class DXDevice : public offloadtest::Device { Queue &getGraphicsQueue() override { return GraphicsQueue; } + llvm::Expected> + createFence(llvm::StringRef Name) override { + return DXFence::create(Device.Get(), Name); + } + llvm::Expected> createBuffer(std::string Name, BufferCreateDesc &Desc, size_t SizeInBytes) override { @@ -1136,56 +1212,20 @@ class DXDevice : public offloadtest::Device { IS.CmdList->ResourceBarrier(1, &Barrier); } - llvm::Error createEvent(InvocationState &IS) { - if (auto Err = HR::toError(Device->CreateFence(0, D3D12_FENCE_FLAG_NONE, - IID_PPV_ARGS(&IS.Fence)), - "Failed to create fence.")) - return Err; -#ifdef _WIN32 - IS.Event = CreateEventA(nullptr, false, false, nullptr); - if (!IS.Event) -#else // WSL - IS.Event = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); - if (IS.Event == -1) -#endif - return llvm::createStringError(std::errc::device_or_resource_busy, - "Failed to create event."); - return llvm::Error::success(); - } - llvm::Error waitForSignal(InvocationState &IS) { // This is a hack but it works since this is all single threaded code. static uint64_t FenceCounter = 0; const uint64_t CurrentCounter = FenceCounter + 1; + auto *F = static_cast(IS.Fence.get()); if (auto Err = HR::toError( - GraphicsQueue.Queue->Signal(IS.Fence.Get(), CurrentCounter), + GraphicsQueue.Queue->Signal(F->Fence.Get(), CurrentCounter), "Failed to add signal.")) return Err; - if (IS.Fence->GetCompletedValue() < CurrentCounter) { -#ifdef _WIN32 - HANDLE Event = IS.Event; -#else // WSL - HANDLE Event = reinterpret_cast(IS.Event); -#endif - if (auto Err = - HR::toError(IS.Fence->SetEventOnCompletion(CurrentCounter, Event), - "Failed to register end event.")) - return Err; + if (auto Err = IS.Fence->waitForCompletion(CurrentCounter)) + return Err; -#ifdef _WIN32 - WaitForSingleObject(IS.Event, INFINITE); -#else // WSL - pollfd PollEvent; - PollEvent.fd = IS.Event; - PollEvent.events = POLLIN; - PollEvent.revents = 0; - if (poll(&PollEvent, 1, -1) == -1) - return llvm::createStringError( - std::error_code(errno, std::system_category()), strerror(errno)); -#endif - } FenceCounter = CurrentCounter; return llvm::Error::success(); } @@ -1690,9 +1730,10 @@ class DXDevice : public offloadtest::Device { return Err; llvm::outs() << "Command structures created.\n"; - if (auto Err = createEvent(State)) - return Err; - llvm::outs() << "Event prepared.\n"; + auto FenceOrErr = createFence("Fence"); + if (!FenceOrErr) + return FenceOrErr.takeError(); + State.Fence = std::move(*FenceOrErr); if (auto Err = createBuffers(P, State)) return Err; diff --git a/lib/API/MTL/MTLDevice.cpp b/lib/API/MTL/MTLDevice.cpp index 0c26f7c7a..dad8a6d04 100644 --- a/lib/API/MTL/MTLDevice.cpp +++ b/lib/API/MTL/MTLDevice.cpp @@ -83,6 +83,37 @@ class MTLQueue : public offloadtest::Queue { } }; +class MTLFence : public offloadtest::Fence { +public: + MTLFence(MTL::SharedEvent *Event, llvm::StringRef Name) + : Name(Name), Event(Event) {} + std::string Name; + MTL::SharedEvent *Event; + + static llvm::Expected> + create(MTL::Device *Device, llvm::StringRef Name) { + MTL::SharedEvent *Event = Device->newSharedEvent(); + if (!Event) + return llvm::createStringError(std::errc::device_or_resource_busy, + "Failed to create shared event."); + return std::make_unique(Event, Name); + } + + ~MTLFence() { + if (Event) + Event->release(); + } + + uint64_t getFenceValue() override { return Event->signaledValue(); } + + llvm::Error waitForCompletion(uint64_t SignalValue) override { + if (!Event->waitUntilSignaledValue(SignalValue, UINT64_MAX)) + return llvm::createStringError(std::errc::timed_out, + "Timed out waiting on shared event."); + return llvm::Error::success(); + } +}; + class MTLBuffer : public offloadtest::Buffer { public: MTL::Buffer *Buf; @@ -130,6 +161,7 @@ class MTLDevice : public offloadtest::Device { llvm::SmallVector Buffers; MTL::Texture *FrameBufferTexture = nullptr; MTL::CommandBuffer *CmdBuffer = nullptr; + std::unique_ptr Fence; }; llvm::Error setupVertexShader(InvocationState &IS, const Pipeline &P, @@ -488,14 +520,23 @@ class MTLDevice : public offloadtest::Device { } llvm::Error executeCommands(InvocationState &IS) { + // This is a hack but it works since this is all single threaded code. + static uint64_t FenceCounter = 0; + const uint64_t CurrentCounter = FenceCounter + 1; + auto *F = static_cast(IS.Fence.get()); + + IS.CmdBuffer->encodeSignalEvent(F->Event, CurrentCounter); IS.CmdBuffer->commit(); - IS.CmdBuffer->waitUntilCompleted(); + + if (auto Err = IS.Fence->waitForCompletion(CurrentCounter)) + return Err; // Check and surface any errors that occurred during execution. NS::Error *CBErr = IS.CmdBuffer->error(); if (CBErr) return toError(CBErr); + FenceCounter = CurrentCounter; return llvm::Error::success(); } @@ -565,6 +606,11 @@ class MTLDevice : public offloadtest::Device { Queue &getGraphicsQueue() override { return GraphicsQueue; } + llvm::Expected> + createFence(llvm::StringRef Name) override { + return MTLFence::create(Device, Name); + } + llvm::Expected> createBuffer(std::string Name, BufferCreateDesc &Desc, size_t SizeInBytes) override { @@ -589,6 +635,11 @@ class MTLDevice : public offloadtest::Device { llvm::Error executeProgram(Pipeline &P) override { InvocationState IS; + auto FenceOrErr = createFence("Fence"); + if (!FenceOrErr) + return FenceOrErr.takeError(); + IS.Fence = std::move(*FenceOrErr); + if (auto Err = createBuffers(P, IS)) return Err; diff --git a/lib/API/VK/Device.cpp b/lib/API/VK/Device.cpp index 5c78b8afa..81df567bb 100644 --- a/lib/API/VK/Device.cpp +++ b/lib/API/VK/Device.cpp @@ -391,6 +391,58 @@ class VulkanBuffer : public offloadtest::Buffer { } }; +class VulkanFence : public offloadtest::Fence { +public: + VulkanFence(VkDevice Device, VkSemaphore Semaphore, llvm::StringRef Name) + : Name(Name), Device(Device), Semaphore(Semaphore) {} + + std::string Name; + VkDevice Device; + VkSemaphore Semaphore; + + static llvm::Expected> + create(VkDevice Device, llvm::StringRef Name) { + VkSemaphoreTypeCreateInfo TypeCreateInfo = {}; + TypeCreateInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO; + TypeCreateInfo.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE; + + VkSemaphoreCreateInfo CreateInfo = {}; + CreateInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + CreateInfo.pNext = &TypeCreateInfo; + + VkSemaphore Semaphore = VK_NULL_HANDLE; + if (vkCreateSemaphore(Device, &CreateInfo, nullptr, &Semaphore)) + return llvm::createStringError(std::errc::device_or_resource_busy, + "Failed to create Semaphore."); + + return std::make_unique(Device, Semaphore, Name); + } + + ~VulkanFence() { vkDestroySemaphore(Device, Semaphore, nullptr); } + + uint64_t getFenceValue() override { + uint64_t Value = 0; + [[maybe_unused]] const VkResult Ret = + vkGetSemaphoreCounterValue(Device, Semaphore, &Value); + assert(!Ret && "vkGetSemaphoreCounterValue failed but should never fail."); + return Value; + } + + llvm::Error waitForCompletion(uint64_t SignalValue) override { + VkSemaphoreWaitInfo WaitInfo = {}; + WaitInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO; + WaitInfo.semaphoreCount = 1; + WaitInfo.pSemaphores = &Semaphore; + WaitInfo.pValues = &SignalValue; + + if (vkWaitSemaphores(Device, &WaitInfo, UINT64_MAX)) + return llvm::createStringError(std::errc::device_or_resource_busy, + "Failed to wait on Semaphore."); + + return llvm::Error::success(); + } +}; + class VulkanQueue : public offloadtest::Queue { public: VkQueue Queue = VK_NULL_HANDLE; @@ -487,6 +539,8 @@ class VulkanDevice : public offloadtest::Device { VkPipelineCache PipelineCache = VK_NULL_HANDLE; VkPipeline Pipeline = VK_NULL_HANDLE; + std::unique_ptr Fence; + // FrameBuffer associated data for offscreen rendering. VkFramebuffer FrameBuffer = VK_NULL_HANDLE; ResourceBundle FrameBufferResource = {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 0, @@ -649,6 +703,11 @@ class VulkanDevice : public offloadtest::Device { Queue &getGraphicsQueue() override { return GraphicsQueue; } + llvm::Expected> + createFence(llvm::StringRef Name) override { + return VulkanFence::create(Device, Name); + } + llvm::Expected> createBuffer(std::string Name, BufferCreateDesc &Desc, size_t SizeInBytes) override { @@ -1155,31 +1214,40 @@ class VulkanDevice : public offloadtest::Device { } llvm::Error executeCommandBuffer(InvocationState &IS) { + // This is a hack but it works since this is all single threaded code. + static uint64_t FenceCounter = 0; + const uint64_t CurrentCounter = FenceCounter + 1; + if (vkEndCommandBuffer(IS.CmdBuffer)) return llvm::createStringError(std::errc::device_or_resource_busy, "Could not end command buffer."); + auto *F = static_cast(IS.Fence.get()); + + VkTimelineSemaphoreSubmitInfo TimelineSubmitInfo = {}; + TimelineSubmitInfo.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO; + TimelineSubmitInfo.signalSemaphoreValueCount = 1; + TimelineSubmitInfo.pSignalSemaphoreValues = &CurrentCounter; + VkSubmitInfo SubmitInfo = {}; SubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + SubmitInfo.pNext = &TimelineSubmitInfo; SubmitInfo.commandBufferCount = 1; SubmitInfo.pCommandBuffers = &IS.CmdBuffer; - VkFenceCreateInfo FenceInfo = {}; - FenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - VkFence Fence; - if (vkCreateFence(Device, &FenceInfo, nullptr, &Fence)) - return llvm::createStringError(std::errc::device_or_resource_busy, - "Could not create fence."); + SubmitInfo.signalSemaphoreCount = 1; + SubmitInfo.pSignalSemaphores = &F->Semaphore; // Submit to the queue - if (vkQueueSubmit(GraphicsQueue.Queue, 1, &SubmitInfo, Fence)) + if (vkQueueSubmit(GraphicsQueue.Queue, 1, &SubmitInfo, VK_NULL_HANDLE)) return llvm::createStringError(std::errc::device_or_resource_busy, "Failed to submit to queue."); - if (vkWaitForFences(Device, 1, &Fence, VK_TRUE, UINT64_MAX)) - return llvm::createStringError(std::errc::device_or_resource_busy, - "Failed waiting for fence."); - vkDestroyFence(Device, Fence, nullptr); + if (auto Err = IS.Fence->waitForCompletion(CurrentCounter)) + return Err; + vkFreeCommandBuffers(Device, IS.CmdPool, 1, &IS.CmdBuffer); + + FenceCounter = CurrentCounter; return llvm::Error::success(); } @@ -2327,6 +2395,12 @@ class VulkanDevice : public offloadtest::Device { if (auto Err = createDevice(State)) return Err; + + auto FenceOrErr = createFence("Fence"); + if (!FenceOrErr) + return FenceOrErr.takeError(); + State.Fence = std::move(*FenceOrErr); + llvm::outs() << "Physical device created.\n"; if (auto Err = createShaderModules(P, State)) return Err;