Skip to content

Commit 1522442

Browse files
manon-traverseMarijnS95claude
authored
Implement an abstract Fence type to expose waiting for command buffer completion. (#1007)
The Fence type is modeled around the DX12 Fence. The closest equivalent in Vulkan is a Timeline Semaphore. These are widely supported and included in Vulkan 1.2. Metal provides the same functionality through the SharedEvent type. The abstract interface allows us to wait for GPU work to complete by waiting on a signal value using: ```C++ Fence->waitForCompletion(SignalValue); ``` Signaling the value still requires downcasting because queue submission is not done via an abstract interface yet. --------- Co-authored-by: Marijn Suijten <marijn@traverseresearch.nl> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent dacdf0d commit 1522442

4 files changed

Lines changed: 244 additions & 61 deletions

File tree

include/API/Device.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,20 @@ class Buffer {
5656
Buffer() = default;
5757
};
5858

59+
class Fence {
60+
public:
61+
virtual ~Fence() = default;
62+
63+
Fence(const Fence &) = delete;
64+
Fence &operator=(const Fence &) = delete;
65+
66+
virtual uint64_t getFenceValue() = 0;
67+
virtual llvm::Error waitForCompletion(uint64_t SignalValue) = 0;
68+
69+
protected:
70+
Fence() = default;
71+
};
72+
5973
class Queue {
6074
public:
6175
virtual ~Queue() = 0;
@@ -77,6 +91,9 @@ class Device {
7791

7892
virtual Queue &getGraphicsQueue() = 0;
7993

94+
virtual llvm::Expected<std::unique_ptr<Fence>>
95+
createFence(llvm::StringRef Name) = 0;
96+
8097
virtual llvm::Expected<std::shared_ptr<Buffer>>
8198
createBuffer(std::string Name, BufferCreateDesc &Desc,
8299
size_t SizeInBytes) = 0;

lib/API/DX/Device.cpp

Lines changed: 90 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,82 @@ class DXBuffer : public offloadtest::Buffer {
293293
: Buffer(Buffer), Name(Name), Desc(Desc), SizeInBytes(SizeInBytes) {}
294294
};
295295

296+
class DXFence : public offloadtest::Fence {
297+
public:
298+
#ifdef _WIN32
299+
DXFence(ComPtr<ID3D12Fence> Fence, HANDLE Event, llvm::StringRef Name)
300+
#else // WSL
301+
DXFence(ComPtr<ID3D12Fence> Fence, int Event, llvm::StringRef Name)
302+
#endif
303+
: Name(Name), Fence(Fence), Event(Event) {
304+
}
305+
306+
std::string Name;
307+
ComPtr<ID3D12Fence> Fence;
308+
#ifdef _WIN32
309+
HANDLE Event;
310+
#else // WSL
311+
int Event;
312+
#endif
313+
314+
static llvm::Expected<std::unique_ptr<DXFence>> create(ID3D12Device *Device,
315+
llvm::StringRef Name) {
316+
ComPtr<ID3D12Fence> Fence;
317+
if (auto Err = HR::toError(
318+
Device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&Fence)),
319+
"Failed to create Fence."))
320+
return Err;
321+
322+
#ifdef _WIN32
323+
HANDLE Event = CreateEventA(nullptr, false, false, nullptr);
324+
if (!Event)
325+
#else // WSL
326+
int Event = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
327+
if (Event == -1)
328+
#endif
329+
return llvm::createStringError(std::errc::device_or_resource_busy,
330+
"Failed to create event.");
331+
332+
return std::make_unique<DXFence>(Fence, Event, Name);
333+
}
334+
335+
~DXFence() {
336+
#ifdef _WIN32
337+
CloseHandle(Event);
338+
#else // WSL
339+
close(Event);
340+
#endif
341+
}
342+
343+
uint64_t getFenceValue() override { return Fence->GetCompletedValue(); }
344+
345+
llvm::Error waitForCompletion(uint64_t SignalValue) override {
346+
if (Fence->GetCompletedValue() >= SignalValue)
347+
return llvm::Error::success();
348+
349+
#ifdef _WIN32
350+
if (auto Err = HR::toError(Fence->SetEventOnCompletion(SignalValue, Event),
351+
"Failed to register end event."))
352+
return Err;
353+
WaitForSingleObject(Event, INFINITE);
354+
#else // WSL
355+
if (auto Err =
356+
HR::toError(Fence->SetEventOnCompletion(
357+
SignalValue, reinterpret_cast<HANDLE>(Event)),
358+
"Failed to register end event."))
359+
return Err;
360+
pollfd PollEvent;
361+
PollEvent.fd = Event;
362+
PollEvent.events = POLLIN;
363+
PollEvent.revents = 0;
364+
if (poll(&PollEvent, 1, -1) == -1)
365+
return llvm::createStringError(
366+
std::error_code(errno, std::system_category()), strerror(errno));
367+
#endif
368+
return llvm::Error::success();
369+
}
370+
};
371+
296372
class DXQueue : public offloadtest::Queue {
297373
public:
298374
ComPtr<ID3D12CommandQueue> Queue;
@@ -346,12 +422,7 @@ class DXDevice : public offloadtest::Device {
346422
ComPtr<ID3D12PipelineState> PSO;
347423
ComPtr<ID3D12CommandAllocator> Allocator;
348424
ComPtr<ID3D12GraphicsCommandList> CmdList;
349-
ComPtr<ID3D12Fence> Fence;
350-
#ifdef _WIN32
351-
HANDLE Event;
352-
#else // WSL
353-
int Event;
354-
#endif
425+
std::unique_ptr<offloadtest::Fence> Fence;
355426

356427
// Resources for graphics pipelines.
357428
ComPtr<ID3D12Resource> RT;
@@ -378,6 +449,11 @@ class DXDevice : public offloadtest::Device {
378449

379450
Queue &getGraphicsQueue() override { return GraphicsQueue; }
380451

452+
llvm::Expected<std::unique_ptr<offloadtest::Fence>>
453+
createFence(llvm::StringRef Name) override {
454+
return DXFence::create(Device.Get(), Name);
455+
}
456+
381457
llvm::Expected<std::shared_ptr<offloadtest::Buffer>>
382458
createBuffer(std::string Name, BufferCreateDesc &Desc,
383459
size_t SizeInBytes) override {
@@ -1136,56 +1212,20 @@ class DXDevice : public offloadtest::Device {
11361212
IS.CmdList->ResourceBarrier(1, &Barrier);
11371213
}
11381214

1139-
llvm::Error createEvent(InvocationState &IS) {
1140-
if (auto Err = HR::toError(Device->CreateFence(0, D3D12_FENCE_FLAG_NONE,
1141-
IID_PPV_ARGS(&IS.Fence)),
1142-
"Failed to create fence."))
1143-
return Err;
1144-
#ifdef _WIN32
1145-
IS.Event = CreateEventA(nullptr, false, false, nullptr);
1146-
if (!IS.Event)
1147-
#else // WSL
1148-
IS.Event = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
1149-
if (IS.Event == -1)
1150-
#endif
1151-
return llvm::createStringError(std::errc::device_or_resource_busy,
1152-
"Failed to create event.");
1153-
return llvm::Error::success();
1154-
}
1155-
11561215
llvm::Error waitForSignal(InvocationState &IS) {
11571216
// This is a hack but it works since this is all single threaded code.
11581217
static uint64_t FenceCounter = 0;
11591218
const uint64_t CurrentCounter = FenceCounter + 1;
1219+
auto *F = static_cast<DXFence *>(IS.Fence.get());
11601220

11611221
if (auto Err = HR::toError(
1162-
GraphicsQueue.Queue->Signal(IS.Fence.Get(), CurrentCounter),
1222+
GraphicsQueue.Queue->Signal(F->Fence.Get(), CurrentCounter),
11631223
"Failed to add signal."))
11641224
return Err;
11651225

1166-
if (IS.Fence->GetCompletedValue() < CurrentCounter) {
1167-
#ifdef _WIN32
1168-
HANDLE Event = IS.Event;
1169-
#else // WSL
1170-
HANDLE Event = reinterpret_cast<HANDLE>(IS.Event);
1171-
#endif
1172-
if (auto Err =
1173-
HR::toError(IS.Fence->SetEventOnCompletion(CurrentCounter, Event),
1174-
"Failed to register end event."))
1175-
return Err;
1226+
if (auto Err = IS.Fence->waitForCompletion(CurrentCounter))
1227+
return Err;
11761228

1177-
#ifdef _WIN32
1178-
WaitForSingleObject(IS.Event, INFINITE);
1179-
#else // WSL
1180-
pollfd PollEvent;
1181-
PollEvent.fd = IS.Event;
1182-
PollEvent.events = POLLIN;
1183-
PollEvent.revents = 0;
1184-
if (poll(&PollEvent, 1, -1) == -1)
1185-
return llvm::createStringError(
1186-
std::error_code(errno, std::system_category()), strerror(errno));
1187-
#endif
1188-
}
11891229
FenceCounter = CurrentCounter;
11901230
return llvm::Error::success();
11911231
}
@@ -1690,9 +1730,10 @@ class DXDevice : public offloadtest::Device {
16901730
return Err;
16911731
llvm::outs() << "Command structures created.\n";
16921732

1693-
if (auto Err = createEvent(State))
1694-
return Err;
1695-
llvm::outs() << "Event prepared.\n";
1733+
auto FenceOrErr = createFence("Fence");
1734+
if (!FenceOrErr)
1735+
return FenceOrErr.takeError();
1736+
State.Fence = std::move(*FenceOrErr);
16961737

16971738
if (auto Err = createBuffers(P, State))
16981739
return Err;

lib/API/MTL/MTLDevice.cpp

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,37 @@ class MTLQueue : public offloadtest::Queue {
8383
}
8484
};
8585

86+
class MTLFence : public offloadtest::Fence {
87+
public:
88+
MTLFence(MTL::SharedEvent *Event, llvm::StringRef Name)
89+
: Name(Name), Event(Event) {}
90+
std::string Name;
91+
MTL::SharedEvent *Event;
92+
93+
static llvm::Expected<std::unique_ptr<MTLFence>>
94+
create(MTL::Device *Device, llvm::StringRef Name) {
95+
MTL::SharedEvent *Event = Device->newSharedEvent();
96+
if (!Event)
97+
return llvm::createStringError(std::errc::device_or_resource_busy,
98+
"Failed to create shared event.");
99+
return std::make_unique<MTLFence>(Event, Name);
100+
}
101+
102+
~MTLFence() {
103+
if (Event)
104+
Event->release();
105+
}
106+
107+
uint64_t getFenceValue() override { return Event->signaledValue(); }
108+
109+
llvm::Error waitForCompletion(uint64_t SignalValue) override {
110+
if (!Event->waitUntilSignaledValue(SignalValue, UINT64_MAX))
111+
return llvm::createStringError(std::errc::timed_out,
112+
"Timed out waiting on shared event.");
113+
return llvm::Error::success();
114+
}
115+
};
116+
86117
class MTLBuffer : public offloadtest::Buffer {
87118
public:
88119
MTL::Buffer *Buf;
@@ -130,6 +161,7 @@ class MTLDevice : public offloadtest::Device {
130161
llvm::SmallVector<MTL::Buffer *> Buffers;
131162
MTL::Texture *FrameBufferTexture = nullptr;
132163
MTL::CommandBuffer *CmdBuffer = nullptr;
164+
std::unique_ptr<offloadtest::Fence> Fence;
133165
};
134166

135167
llvm::Error setupVertexShader(InvocationState &IS, const Pipeline &P,
@@ -488,14 +520,23 @@ class MTLDevice : public offloadtest::Device {
488520
}
489521

490522
llvm::Error executeCommands(InvocationState &IS) {
523+
// This is a hack but it works since this is all single threaded code.
524+
static uint64_t FenceCounter = 0;
525+
const uint64_t CurrentCounter = FenceCounter + 1;
526+
auto *F = static_cast<MTLFence *>(IS.Fence.get());
527+
528+
IS.CmdBuffer->encodeSignalEvent(F->Event, CurrentCounter);
491529
IS.CmdBuffer->commit();
492-
IS.CmdBuffer->waitUntilCompleted();
530+
531+
if (auto Err = IS.Fence->waitForCompletion(CurrentCounter))
532+
return Err;
493533

494534
// Check and surface any errors that occurred during execution.
495535
NS::Error *CBErr = IS.CmdBuffer->error();
496536
if (CBErr)
497537
return toError(CBErr);
498538

539+
FenceCounter = CurrentCounter;
499540
return llvm::Error::success();
500541
}
501542

@@ -565,6 +606,11 @@ class MTLDevice : public offloadtest::Device {
565606

566607
Queue &getGraphicsQueue() override { return GraphicsQueue; }
567608

609+
llvm::Expected<std::unique_ptr<offloadtest::Fence>>
610+
createFence(llvm::StringRef Name) override {
611+
return MTLFence::create(Device, Name);
612+
}
613+
568614
llvm::Expected<std::shared_ptr<offloadtest::Buffer>>
569615
createBuffer(std::string Name, BufferCreateDesc &Desc,
570616
size_t SizeInBytes) override {
@@ -589,6 +635,11 @@ class MTLDevice : public offloadtest::Device {
589635
llvm::Error executeProgram(Pipeline &P) override {
590636
InvocationState IS;
591637

638+
auto FenceOrErr = createFence("Fence");
639+
if (!FenceOrErr)
640+
return FenceOrErr.takeError();
641+
IS.Fence = std::move(*FenceOrErr);
642+
592643
if (auto Err = createBuffers(P, IS))
593644
return Err;
594645

0 commit comments

Comments
 (0)