Skip to content

Commit 7caa51c

Browse files
MarijnS95claude
andcommitted
Add Queue::submit() for command buffer submission
Move command buffer submission logic from each backend's Device into Queue::submit(), which takes ownership of the command buffers. For now it blocks internally until completion; a TODO marks that it will return a Fence once the Fence abstraction from PR llvm#1007 is available. - Metal: commit() + waitUntilCompleted() - Vulkan: vkEndCommandBuffer() + vkQueueSubmit() with temporary fence + vkWaitForFences() - DX12: CmdList::Close() + ExecuteCommandLists() + Queue::Signal()/Fence::SetEventOnCompletion() wait VulkanQueue now stores a VkDevice handle (with a TODO for lifetime management) so it can create/destroy fences independently. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 095fc23 commit 7caa51c

File tree

4 files changed

+142
-57
lines changed

4 files changed

+142
-57
lines changed

include/API/Device.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,19 @@ class Queue {
6262
public:
6363
virtual ~Queue() = 0;
6464

65+
/// Submit command buffers for execution and block until completion.
66+
// TODO: Return a Fence instead of blocking, once the Fence abstraction
67+
// from PR #1007 is available.
68+
virtual llvm::Error
69+
submit(llvm::SmallVector<std::unique_ptr<CommandBuffer>> CBs) = 0;
70+
71+
/// Convenience overload for submitting a single command buffer.
72+
llvm::Error submit(std::unique_ptr<CommandBuffer> CB) {
73+
llvm::SmallVector<std::unique_ptr<CommandBuffer>> CBs;
74+
CBs.push_back(std::move(CB));
75+
return submit(std::move(CBs));
76+
}
77+
6578
protected:
6679
Queue() = default;
6780
};

lib/API/DX/Device.cpp

Lines changed: 62 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -295,22 +295,28 @@ class DXBuffer : public offloadtest::Buffer {
295295

296296
class DXQueue : public offloadtest::Queue {
297297
public:
298+
using Queue::submit;
299+
298300
ComPtr<ID3D12CommandQueue> Queue;
299301

300302
DXQueue(ComPtr<ID3D12CommandQueue> Queue) : Queue(Queue) {}
301-
virtual ~DXQueue() {}
303+
~DXQueue() override {}
302304

303305
static llvm::Expected<DXQueue>
304306
createGraphicsQueue(ComPtr<ID3D12Device> Device) {
305307
const D3D12_COMMAND_QUEUE_DESC Desc = {D3D12_COMMAND_LIST_TYPE_DIRECT, 0,
306308
D3D12_COMMAND_QUEUE_FLAG_NONE, 0};
307-
ComPtr<ID3D12CommandQueue> Queue;
308-
if (auto Err =
309-
HR::toError(Device->CreateCommandQueue(&Desc, IID_PPV_ARGS(&Queue)),
310-
"Failed to create command queue."))
309+
ComPtr<ID3D12CommandQueue> CmdQueue;
310+
if (auto Err = HR::toError(
311+
Device->CreateCommandQueue(&Desc, IID_PPV_ARGS(&CmdQueue)),
312+
"Failed to create command queue."))
311313
return Err;
312-
return DXQueue(Queue);
314+
return DXQueue(CmdQueue);
313315
}
316+
317+
llvm::Error
318+
submit(llvm::SmallVector<std::unique_ptr<offloadtest::CommandBuffer>> CBs)
319+
override;
314320
};
315321

316322
class DXCommandBuffer : public offloadtest::CommandBuffer {
@@ -373,6 +379,52 @@ class DXCommandBuffer : public offloadtest::CommandBuffer {
373379
DXCommandBuffer() : CommandBuffer(GPUAPI::DirectX) {}
374380
};
375381

382+
llvm::Error DXQueue::submit(
383+
llvm::SmallVector<std::unique_ptr<offloadtest::CommandBuffer>> CBs) {
384+
// This is a hack but it works since this is all single threaded code.
385+
static uint64_t FenceCounter = 0;
386+
387+
for (auto &CB : CBs) {
388+
auto &DCB = CB->as<DXCommandBuffer>();
389+
if (auto Err =
390+
HR::toError(DCB.CmdList->Close(), "Failed to close command list."))
391+
return Err;
392+
393+
ID3D12CommandList *const CmdLists[] = {DCB.CmdList.Get()};
394+
Queue->ExecuteCommandLists(1, CmdLists);
395+
396+
const uint64_t CurrentCounter = FenceCounter + 1;
397+
if (auto Err = HR::toError(Queue->Signal(DCB.Fence.Get(), CurrentCounter),
398+
"Failed to add signal."))
399+
return Err;
400+
401+
if (DCB.Fence->GetCompletedValue() < CurrentCounter) {
402+
#ifdef _WIN32
403+
HANDLE Event = DCB.Event;
404+
#else // WSL
405+
HANDLE Event = reinterpret_cast<HANDLE>(DCB.Event);
406+
#endif
407+
if (auto Err = HR::toError(
408+
DCB.Fence->SetEventOnCompletion(CurrentCounter, Event),
409+
"Failed to register end event."))
410+
return Err;
411+
412+
#ifdef _WIN32
413+
WaitForSingleObject(DCB.Event, INFINITE);
414+
#else // WSL
415+
pollfd PollEvent;
416+
PollEvent.fd = DCB.Event;
417+
PollEvent.events = POLLIN;
418+
PollEvent.revents = 0;
419+
if (poll(&PollEvent, 1, -1) == -1)
420+
return llvm::createStringError(
421+
std::error_code(errno, std::system_category()), strerror(errno));
422+
#endif
423+
}
424+
FenceCounter = CurrentCounter;
425+
}
426+
return llvm::Error::success();
427+
}
376428
class DXDevice : public offloadtest::Device {
377429
private:
378430
ComPtr<IDXCoreAdapter> Adapter;
@@ -1179,8 +1231,10 @@ class DXDevice : public offloadtest::Device {
11791231
IS.CB->CmdList->ResourceBarrier(1, &Barrier);
11801232
}
11811233

1234+
// waitForSignal is used for tile mapping synchronization, not command buffer
1235+
// submission. TODO: Replace with a proper fence abstraction.
11821236
llvm::Error waitForSignal(InvocationState &IS) {
1183-
// This is a hack but it works since this is all single threaded code.
1237+
// Reuse the command buffer's fence for a quick queue-level signal/wait.
11841238
static uint64_t FenceCounter = 0;
11851239
const uint64_t CurrentCounter = FenceCounter + 1;
11861240

@@ -1217,14 +1271,7 @@ class DXDevice : public offloadtest::Device {
12171271
}
12181272

12191273
llvm::Error executeCommandList(InvocationState &IS) {
1220-
if (auto Err = HR::toError(IS.CB->CmdList->Close(),
1221-
"Failed to close command list."))
1222-
return Err;
1223-
1224-
ID3D12CommandList *const CmdLists[] = {IS.CB->CmdList.Get()};
1225-
GraphicsQueue.Queue->ExecuteCommandLists(1, CmdLists);
1226-
1227-
return waitForSignal(IS);
1274+
return GraphicsQueue.submit(std::move(IS.CB));
12281275
}
12291276

12301277
llvm::Error createComputeCommands(Pipeline &P, InvocationState &IS) {

lib/API/MTL/MTLDevice.cpp

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -75,12 +75,18 @@ static MTL::VertexFormat getMTLVertexFormat(DataFormat Format, int Channels) {
7575
namespace {
7676
class MTLQueue : public offloadtest::Queue {
7777
public:
78+
using Queue::submit;
79+
7880
MTL::CommandQueue *Queue;
7981
MTLQueue(MTL::CommandQueue *Queue) : Queue(Queue) {}
80-
~MTLQueue() {
82+
~MTLQueue() override {
8183
if (Queue)
8284
Queue->release();
8385
}
86+
87+
llvm::Error
88+
submit(llvm::SmallVector<std::unique_ptr<offloadtest::CommandBuffer>> CBs)
89+
override;
8490
};
8591

8692
class MTLBuffer : public offloadtest::Buffer {
@@ -122,6 +128,19 @@ class MTLCommandBuffer : public offloadtest::CommandBuffer {
122128
MTLCommandBuffer() : CommandBuffer(GPUAPI::Metal) {}
123129
};
124130

131+
llvm::Error MTLQueue::submit(
132+
llvm::SmallVector<std::unique_ptr<offloadtest::CommandBuffer>> CBs) {
133+
for (auto &CB : CBs) {
134+
auto &MCB = CB->as<MTLCommandBuffer>();
135+
MCB.CmdBuffer->commit();
136+
MCB.CmdBuffer->waitUntilCompleted();
137+
138+
NS::Error *Err = MCB.CmdBuffer->error();
139+
if (Err)
140+
return toError(Err);
141+
}
142+
return llvm::Error::success();
143+
}
125144
class MTLDevice : public offloadtest::Device {
126145
Capabilities Caps;
127146
MTL::Device *Device;
@@ -506,15 +525,7 @@ class MTLDevice : public offloadtest::Device {
506525
}
507526

508527
llvm::Error executeCommands(InvocationState &IS) {
509-
IS.CB->CmdBuffer->commit();
510-
IS.CB->CmdBuffer->waitUntilCompleted();
511-
512-
// Check and surface any errors that occurred during execution.
513-
NS::Error *CBErr = IS.CB->CmdBuffer->error();
514-
if (CBErr)
515-
return toError(CBErr);
516-
517-
return llvm::Error::success();
528+
return GraphicsQueue.submit(std::move(IS.CB));
518529
}
519530

520531
llvm::Error copyBack(Pipeline &P, InvocationState &IS) {

lib/API/VK/Device.cpp

Lines changed: 46 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -393,10 +393,18 @@ class VulkanBuffer : public offloadtest::Buffer {
393393

394394
class VulkanQueue : public offloadtest::Queue {
395395
public:
396+
using Queue::submit;
397+
396398
VkQueue Queue = VK_NULL_HANDLE;
397399
uint32_t QueueFamilyIdx = 0;
398-
VulkanQueue(VkQueue Q, uint32_t QueueFamilyIdx)
399-
: Queue(Q), QueueFamilyIdx(QueueFamilyIdx) {}
400+
// TODO: Ensure device lifetime is managed (e.g. via shared_ptr).
401+
VkDevice Device = VK_NULL_HANDLE;
402+
VulkanQueue(VkQueue Q, uint32_t QueueFamilyIdx, VkDevice Device)
403+
: Queue(Q), QueueFamilyIdx(QueueFamilyIdx), Device(Device) {}
404+
405+
llvm::Error
406+
submit(llvm::SmallVector<std::unique_ptr<offloadtest::CommandBuffer>> CBs)
407+
override;
400408
};
401409

402410
class VulkanCommandBuffer : public offloadtest::CommandBuffer {
@@ -449,6 +457,37 @@ class VulkanCommandBuffer : public offloadtest::CommandBuffer {
449457
VulkanCommandBuffer() : CommandBuffer(GPUAPI::Vulkan) {}
450458
};
451459

460+
llvm::Error VulkanQueue::submit(
461+
llvm::SmallVector<std::unique_ptr<offloadtest::CommandBuffer>> CBs) {
462+
for (auto &CB : CBs) {
463+
auto &VCB = CB->as<VulkanCommandBuffer>();
464+
if (vkEndCommandBuffer(VCB.CmdBuffer))
465+
return llvm::createStringError(std::errc::device_or_resource_busy,
466+
"Could not end command buffer.");
467+
468+
VkSubmitInfo SubmitInfo = {};
469+
SubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
470+
SubmitInfo.commandBufferCount = 1;
471+
SubmitInfo.pCommandBuffers = &VCB.CmdBuffer;
472+
473+
VkFenceCreateInfo FenceInfo = {};
474+
FenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
475+
VkFence Fence;
476+
if (vkCreateFence(Device, &FenceInfo, nullptr, &Fence))
477+
return llvm::createStringError(std::errc::device_or_resource_busy,
478+
"Could not create fence.");
479+
480+
if (vkQueueSubmit(Queue, 1, &SubmitInfo, Fence))
481+
return llvm::createStringError(std::errc::device_or_resource_busy,
482+
"Failed to submit to queue.");
483+
if (vkWaitForFences(Device, 1, &Fence, VK_TRUE, UINT64_MAX))
484+
return llvm::createStringError(std::errc::device_or_resource_busy,
485+
"Failed waiting for fence.");
486+
487+
vkDestroyFence(Device, Fence, nullptr);
488+
}
489+
return llvm::Error::success();
490+
}
452491
class VulkanDevice : public offloadtest::Device {
453492
private:
454493
std::shared_ptr<VulkanInstance> Instance;
@@ -645,7 +684,8 @@ class VulkanDevice : public offloadtest::Device {
645684
VkQueue DeviceQueue = VK_NULL_HANDLE;
646685
vkGetDeviceQueue(Device, QueueFamilyIdx, 0, &DeviceQueue);
647686

648-
const VulkanQueue GraphicsQueue = VulkanQueue(DeviceQueue, QueueFamilyIdx);
687+
const VulkanQueue GraphicsQueue =
688+
VulkanQueue(DeviceQueue, QueueFamilyIdx, Device);
649689

650690
return std::make_unique<VulkanDevice>(Instance, PhysicalDevice, Props,
651691
Device, std::move(GraphicsQueue),
@@ -1179,34 +1219,8 @@ class VulkanDevice : public offloadtest::Device {
11791219
return llvm::Error::success();
11801220
}
11811221

1182-
llvm::Error executeCommandBuffer(InvocationState &IS,
1183-
VkPipelineStageFlags WaitMask = 0) {
1184-
if (vkEndCommandBuffer(IS.CB->CmdBuffer))
1185-
return llvm::createStringError(std::errc::device_or_resource_busy,
1186-
"Could not end command buffer.");
1187-
1188-
VkSubmitInfo SubmitInfo = {};
1189-
SubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
1190-
SubmitInfo.commandBufferCount = 1;
1191-
SubmitInfo.pCommandBuffers = &IS.CB->CmdBuffer;
1192-
SubmitInfo.pWaitDstStageMask = &WaitMask;
1193-
VkFenceCreateInfo FenceInfo = {};
1194-
FenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
1195-
VkFence Fence;
1196-
if (vkCreateFence(Device, &FenceInfo, nullptr, &Fence))
1197-
return llvm::createStringError(std::errc::device_or_resource_busy,
1198-
"Could not create fence.");
1199-
1200-
// Submit to the queue
1201-
if (vkQueueSubmit(GraphicsQueue.Queue, 1, &SubmitInfo, Fence))
1202-
return llvm::createStringError(std::errc::device_or_resource_busy,
1203-
"Failed to submit to queue.");
1204-
if (vkWaitForFences(Device, 1, &Fence, VK_TRUE, UINT64_MAX))
1205-
return llvm::createStringError(std::errc::device_or_resource_busy,
1206-
"Failed waiting for fence.");
1207-
1208-
vkDestroyFence(Device, Fence, nullptr);
1209-
return llvm::Error::success();
1222+
llvm::Error executeCommandBuffer(InvocationState &IS) {
1223+
return GraphicsQueue.submit(std::move(IS.CB));
12101224
}
12111225

12121226
llvm::Error createDescriptorPool(Pipeline &P, InvocationState &IS) {
@@ -2392,7 +2406,7 @@ class VulkanDevice : public offloadtest::Device {
23922406
if (auto Err = createCommands(P, State))
23932407
return Err;
23942408
llvm::outs() << "Commands created.\n";
2395-
if (auto Err = executeCommandBuffer(State, VK_PIPELINE_STAGE_TRANSFER_BIT))
2409+
if (auto Err = executeCommandBuffer(State))
23962410
return Err;
23972411
llvm::outs() << "Executed compute command buffer.\n";
23982412
if (auto Err = readBackData(P, State))

0 commit comments

Comments
 (0)