Skip to content

Commit c010c87

Browse files
MarijnS95claude
andcommitted
Add Queue::submit() for command buffer submission
Move command buffer submission logic from each backend's Device into Queue::submit(), which takes ownership of the command buffers. For now it blocks internally until completion; a TODO marks that it will return a Fence once the Fence abstraction from PR llvm#1007 is available. - Metal: commit + waitUntilCompleted + error check - Vulkan: vkEndCommandBuffer + temporary fence + vkQueueSubmit + wait - DX12: CmdList Close + ExecuteCommandLists + fence signal/wait VulkanQueue now stores a VkDevice handle (with a TODO for lifetime management) so it can create/destroy fences independently. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 1bff4b9 commit c010c87

4 files changed

Lines changed: 146 additions & 58 deletions

File tree

include/API/Device.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,19 @@ class Queue {
6262
public:
6363
virtual ~Queue() = 0;
6464

65+
/// Submit command buffers for execution and block until completion.
66+
// TODO: Return a Fence instead of blocking, once the Fence abstraction
67+
// from PR #1007 is available.
68+
virtual llvm::Error
69+
submit(llvm::SmallVectorImpl<std::unique_ptr<CommandBuffer>> &CBs) = 0;
70+
71+
/// Convenience overload for submitting a single command buffer.
72+
llvm::Error submit(std::unique_ptr<CommandBuffer> CB) {
73+
llvm::SmallVector<std::unique_ptr<CommandBuffer>, 1> CBs;
74+
CBs.push_back(std::move(CB));
75+
return submit(CBs);
76+
}
77+
6578
protected:
6679
Queue() = default;
6780
};

lib/API/DX/Device.cpp

Lines changed: 64 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -292,22 +292,28 @@ class DXBuffer : public offloadtest::Buffer {
292292

293293
class DXQueue : public offloadtest::Queue {
294294
public:
295+
using Queue::submit;
296+
295297
ComPtr<ID3D12CommandQueue> Queue;
296298

297299
DXQueue(ComPtr<ID3D12CommandQueue> Queue) : Queue(Queue) {}
298-
virtual ~DXQueue() {}
300+
~DXQueue() override {}
299301

300302
static llvm::Expected<DXQueue>
301303
createGraphicsQueue(ComPtr<ID3D12Device> Device) {
302304
const D3D12_COMMAND_QUEUE_DESC Desc = {D3D12_COMMAND_LIST_TYPE_DIRECT, 0,
303305
D3D12_COMMAND_QUEUE_FLAG_NONE, 0};
304-
ComPtr<ID3D12CommandQueue> Queue;
306+
ComPtr<ID3D12CommandQueue> CmdQueue;
305307
if (auto Err =
306-
HR::toError(Device->CreateCommandQueue(&Desc, IID_PPV_ARGS(&Queue)),
308+
HR::toError(Device->CreateCommandQueue(&Desc, IID_PPV_ARGS(&CmdQueue)),
307309
"Failed to create command queue."))
308310
return Err;
309-
return DXQueue(Queue);
311+
return DXQueue(CmdQueue);
310312
}
313+
314+
llvm::Error submit(
315+
llvm::SmallVectorImpl<std::unique_ptr<offloadtest::CommandBuffer>> &CBs)
316+
override;
311317
};
312318

313319
class DXCommandBuffer : public offloadtest::CommandBuffer {
@@ -428,6 +434,53 @@ DXCommandBuffer::createComputeEncoder(offloadtest::EncoderMode Mode) {
428434
return std::make_unique<DXComputeEncoder>(*this, Mode);
429435
}
430436

437+
llvm::Error DXQueue::submit(
438+
llvm::SmallVectorImpl<std::unique_ptr<offloadtest::CommandBuffer>> &CBs) {
439+
// This is a hack but it works since this is all single threaded code.
440+
static uint64_t FenceCounter = 0;
441+
442+
for (auto &CB : CBs) {
443+
auto &DCB = CB->as<DXCommandBuffer>();
444+
if (auto Err =
445+
HR::toError(DCB.CmdList->Close(), "Failed to close command list."))
446+
return Err;
447+
448+
ID3D12CommandList *const CmdLists[] = {DCB.CmdList.Get()};
449+
Queue->ExecuteCommandLists(1, CmdLists);
450+
451+
const uint64_t CurrentCounter = FenceCounter + 1;
452+
if (auto Err = HR::toError(Queue->Signal(DCB.Fence.Get(), CurrentCounter),
453+
"Failed to add signal."))
454+
return Err;
455+
456+
if (DCB.Fence->GetCompletedValue() < CurrentCounter) {
457+
#ifdef _WIN32
458+
HANDLE Event = DCB.Event;
459+
#else // WSL
460+
HANDLE Event = reinterpret_cast<HANDLE>(DCB.Event);
461+
#endif
462+
if (auto Err = HR::toError(
463+
DCB.Fence->SetEventOnCompletion(CurrentCounter, Event),
464+
"Failed to register end event."))
465+
return Err;
466+
467+
#ifdef _WIN32
468+
WaitForSingleObject(DCB.Event, INFINITE);
469+
#else // WSL
470+
pollfd PollEvent;
471+
PollEvent.fd = DCB.Event;
472+
PollEvent.events = POLLIN;
473+
PollEvent.revents = 0;
474+
if (poll(&PollEvent, 1, -1) == -1)
475+
return llvm::createStringError(
476+
std::error_code(errno, std::system_category()), strerror(errno));
477+
#endif
478+
}
479+
FenceCounter = CurrentCounter;
480+
}
481+
return llvm::Error::success();
482+
}
483+
431484
class DXDevice : public offloadtest::Device {
432485
private:
433486
ComPtr<IDXCoreAdapter> Adapter;
@@ -1234,8 +1287,10 @@ class DXDevice : public offloadtest::Device {
12341287
IS.CB->CmdList->ResourceBarrier(1, &Barrier);
12351288
}
12361289

1290+
// waitForSignal is used for tile mapping synchronization, not command buffer
1291+
// submission. TODO: Replace with a proper fence abstraction.
12371292
llvm::Error waitForSignal(InvocationState &IS) {
1238-
// This is a hack but it works since this is all single threaded code.
1293+
// Reuse the command buffer's fence for a quick queue-level signal/wait.
12391294
static uint64_t FenceCounter = 0;
12401295
const uint64_t CurrentCounter = FenceCounter + 1;
12411296

@@ -1250,9 +1305,9 @@ class DXDevice : public offloadtest::Device {
12501305
#else // WSL
12511306
HANDLE Event = reinterpret_cast<HANDLE>(IS.CB->Event);
12521307
#endif
1253-
if (auto Err =
1254-
HR::toError(IS.CB->Fence->SetEventOnCompletion(CurrentCounter, Event),
1255-
"Failed to register end event."))
1308+
if (auto Err = HR::toError(
1309+
IS.CB->Fence->SetEventOnCompletion(CurrentCounter, Event),
1310+
"Failed to register end event."))
12561311
return Err;
12571312

12581313
#ifdef _WIN32
@@ -1272,14 +1327,7 @@ class DXDevice : public offloadtest::Device {
12721327
}
12731328

12741329
llvm::Error executeCommandList(InvocationState &IS) {
1275-
if (auto Err =
1276-
HR::toError(IS.CB->CmdList->Close(), "Failed to close command list."))
1277-
return Err;
1278-
1279-
ID3D12CommandList *const CmdLists[] = {IS.CB->CmdList.Get()};
1280-
GraphicsQueue.Queue->ExecuteCommandLists(1, CmdLists);
1281-
1282-
return waitForSignal(IS);
1330+
return GraphicsQueue.submit(std::move(IS.CB));
12831331
}
12841332

12851333
llvm::Error createComputeCommands(Pipeline &P, InvocationState &IS) {

lib/API/MTL/MTLDevice.cpp

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,18 @@ static MTL::VertexFormat getMTLVertexFormat(DataFormat Format, int Channels) {
7676
namespace {
7777
class MTLQueue : public offloadtest::Queue {
7878
public:
79+
using Queue::submit;
80+
7981
MTL::CommandQueue *Queue;
8082
MTLQueue(MTL::CommandQueue *Queue) : Queue(Queue) {}
81-
~MTLQueue() {
83+
~MTLQueue() override {
8284
if (Queue)
8385
Queue->release();
8486
}
87+
88+
llvm::Error submit(
89+
llvm::SmallVectorImpl<std::unique_ptr<offloadtest::CommandBuffer>> &CBs)
90+
override;
8591
};
8692

8793
class MTLBuffer : public offloadtest::Buffer {
@@ -177,6 +183,20 @@ class MTLComputeEncoder : public offloadtest::ComputeEncoder {
177183
}
178184
};
179185

186+
llvm::Error MTLQueue::submit(
187+
llvm::SmallVectorImpl<std::unique_ptr<offloadtest::CommandBuffer>> &CBs) {
188+
for (auto &CB : CBs) {
189+
auto &MCB = CB->as<MTLCommandBuffer>();
190+
MCB.CmdBuffer->commit();
191+
MCB.CmdBuffer->waitUntilCompleted();
192+
193+
NS::Error *Err = MCB.CmdBuffer->error();
194+
if (Err)
195+
return toError(Err);
196+
}
197+
return llvm::Error::success();
198+
}
199+
180200
llvm::Expected<std::unique_ptr<offloadtest::ComputeEncoder>>
181201
MTLCommandBuffer::createComputeEncoder(EncoderMode Mode) {
182202
MTL::ComputeCommandEncoder *NativeEncoder =
@@ -571,15 +591,7 @@ class MTLDevice : public offloadtest::Device {
571591
}
572592

573593
llvm::Error executeCommands(InvocationState &IS) {
574-
IS.CB->CmdBuffer->commit();
575-
IS.CB->CmdBuffer->waitUntilCompleted();
576-
577-
// Check and surface any errors that occurred during execution.
578-
NS::Error *CBErr = IS.CB->CmdBuffer->error();
579-
if (CBErr)
580-
return toError(CBErr);
581-
582-
return llvm::Error::success();
594+
return GraphicsQueue.submit(std::move(IS.CB));
583595
}
584596

585597
llvm::Error copyBack(Pipeline &P, InvocationState &IS) {

lib/API/VK/Device.cpp

Lines changed: 47 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -359,10 +359,18 @@ class VulkanBuffer : public offloadtest::Buffer {
359359

360360
class VulkanQueue : public offloadtest::Queue {
361361
public:
362+
using Queue::submit;
363+
362364
VkQueue Queue = VK_NULL_HANDLE;
363365
uint32_t QueueFamilyIdx = 0;
364-
VulkanQueue(VkQueue Q, uint32_t QueueFamilyIdx)
365-
: Queue(Q), QueueFamilyIdx(QueueFamilyIdx) {}
366+
// TODO: Ensure device lifetime is managed (e.g. via shared_ptr).
367+
VkDevice Device = VK_NULL_HANDLE;
368+
VulkanQueue(VkQueue Q, uint32_t QueueFamilyIdx, VkDevice Device)
369+
: Queue(Q), QueueFamilyIdx(QueueFamilyIdx), Device(Device) {}
370+
371+
llvm::Error submit(
372+
llvm::SmallVectorImpl<std::unique_ptr<offloadtest::CommandBuffer>> &CBs)
373+
override;
366374
};
367375

368376
class VulkanCommandBuffer : public offloadtest::CommandBuffer {
@@ -491,6 +499,38 @@ VulkanCommandBuffer::createComputeEncoder(offloadtest::EncoderMode Mode) {
491499
return std::make_unique<VKComputeEncoder>(*this, Mode);
492500
}
493501

502+
llvm::Error VulkanQueue::submit(
503+
llvm::SmallVectorImpl<std::unique_ptr<offloadtest::CommandBuffer>> &CBs) {
504+
for (auto &CB : CBs) {
505+
auto &VCB = CB->as<VulkanCommandBuffer>();
506+
if (vkEndCommandBuffer(VCB.CmdBuffer))
507+
return llvm::createStringError(std::errc::device_or_resource_busy,
508+
"Could not end command buffer.");
509+
510+
VkSubmitInfo SubmitInfo = {};
511+
SubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
512+
SubmitInfo.commandBufferCount = 1;
513+
SubmitInfo.pCommandBuffers = &VCB.CmdBuffer;
514+
515+
VkFenceCreateInfo FenceInfo = {};
516+
FenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
517+
VkFence Fence;
518+
if (vkCreateFence(Device, &FenceInfo, nullptr, &Fence))
519+
return llvm::createStringError(std::errc::device_or_resource_busy,
520+
"Could not create fence.");
521+
522+
if (vkQueueSubmit(Queue, 1, &SubmitInfo, Fence))
523+
return llvm::createStringError(std::errc::device_or_resource_busy,
524+
"Failed to submit to queue.");
525+
if (vkWaitForFences(Device, 1, &Fence, VK_TRUE, UINT64_MAX))
526+
return llvm::createStringError(std::errc::device_or_resource_busy,
527+
"Failed waiting for fence.");
528+
529+
vkDestroyFence(Device, Fence, nullptr);
530+
}
531+
return llvm::Error::success();
532+
}
533+
494534
class VulkanDevice : public offloadtest::Device {
495535
private:
496536
VkPhysicalDevice PhysicalDevice;
@@ -685,7 +725,8 @@ class VulkanDevice : public offloadtest::Device {
685725
VkQueue DeviceQueue = VK_NULL_HANDLE;
686726
vkGetDeviceQueue(Device, QueueFamilyIdx, 0, &DeviceQueue);
687727

688-
const VulkanQueue GraphicsQueue = VulkanQueue(DeviceQueue, QueueFamilyIdx);
728+
const VulkanQueue GraphicsQueue =
729+
VulkanQueue(DeviceQueue, QueueFamilyIdx, Device);
689730

690731
return std::make_shared<VulkanDevice>(PhysicalDevice, Props, Device,
691732
std::move(GraphicsQueue),
@@ -1208,34 +1249,8 @@ class VulkanDevice : public offloadtest::Device {
12081249
return llvm::Error::success();
12091250
}
12101251

1211-
llvm::Error executeCommandBuffer(InvocationState &IS,
1212-
VkPipelineStageFlags WaitMask = 0) {
1213-
if (vkEndCommandBuffer(IS.CB->CmdBuffer))
1214-
return llvm::createStringError(std::errc::device_or_resource_busy,
1215-
"Could not end command buffer.");
1216-
1217-
VkSubmitInfo SubmitInfo = {};
1218-
SubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
1219-
SubmitInfo.commandBufferCount = 1;
1220-
SubmitInfo.pCommandBuffers = &IS.CB->CmdBuffer;
1221-
SubmitInfo.pWaitDstStageMask = &WaitMask;
1222-
VkFenceCreateInfo FenceInfo = {};
1223-
FenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
1224-
VkFence Fence;
1225-
if (vkCreateFence(Device, &FenceInfo, nullptr, &Fence))
1226-
return llvm::createStringError(std::errc::device_or_resource_busy,
1227-
"Could not create fence.");
1228-
1229-
// Submit to the queue
1230-
if (vkQueueSubmit(GraphicsQueue.Queue, 1, &SubmitInfo, Fence))
1231-
return llvm::createStringError(std::errc::device_or_resource_busy,
1232-
"Failed to submit to queue.");
1233-
if (vkWaitForFences(Device, 1, &Fence, VK_TRUE, UINT64_MAX))
1234-
return llvm::createStringError(std::errc::device_or_resource_busy,
1235-
"Failed waiting for fence.");
1236-
1237-
vkDestroyFence(Device, Fence, nullptr);
1238-
return llvm::Error::success();
1252+
llvm::Error executeCommandBuffer(InvocationState &IS) {
1253+
return GraphicsQueue.submit(std::move(IS.CB));
12391254
}
12401255

12411256
llvm::Error createDescriptorPool(Pipeline &P, InvocationState &IS) {
@@ -2414,7 +2429,7 @@ class VulkanDevice : public offloadtest::Device {
24142429
if (auto Err = createCommands(P, State))
24152430
return Err;
24162431
llvm::outs() << "Commands created.\n";
2417-
if (auto Err = executeCommandBuffer(State, VK_PIPELINE_STAGE_TRANSFER_BIT))
2432+
if (auto Err = executeCommandBuffer(State))
24182433
return Err;
24192434
llvm::outs() << "Executed compute command buffer.\n";
24202435
if (auto Err = readBackData(P, State))

0 commit comments

Comments
 (0)