@@ -433,22 +433,36 @@ class DXFence : public offloadtest::Fence {
433433
434434class DXQueue : public offloadtest ::Queue {
435435public:
436+ using Queue::submit;
437+
436438 ComPtr<ID3D12CommandQueue> Queue;
439+ std::unique_ptr<DXFence> SubmitFence;
440+ uint64_t FenceCounter = 0 ;
437441
438- DXQueue (ComPtr<ID3D12CommandQueue> Queue) : Queue(Queue) {}
439- virtual ~DXQueue () {}
442+ DXQueue (ComPtr<ID3D12CommandQueue> Queue,
443+ std::unique_ptr<DXFence> SubmitFence)
444+ : Queue(Queue), SubmitFence(std::move(SubmitFence)) {}
445+ DXQueue (DXQueue &&) = default ;
446+ ~DXQueue () override {}
440447
441448 static llvm::Expected<DXQueue>
442449 createGraphicsQueue (ComPtr<ID3D12Device> Device) {
443450 const D3D12_COMMAND_QUEUE_DESC Desc = {D3D12_COMMAND_LIST_TYPE_DIRECT, 0 ,
444451 D3D12_COMMAND_QUEUE_FLAG_NONE, 0 };
445- ComPtr<ID3D12CommandQueue> Queue ;
446- if (auto Err =
447- HR::toError ( Device->CreateCommandQueue (&Desc, IID_PPV_ARGS (&Queue )),
448- " Failed to create command queue." ))
452+ ComPtr<ID3D12CommandQueue> CmdQueue ;
453+ if (auto Err = HR::toError (
454+ Device->CreateCommandQueue (&Desc, IID_PPV_ARGS (&CmdQueue )),
455+ " Failed to create command queue." ))
449456 return Err;
450- return DXQueue (Queue);
457+ auto FenceOrErr = DXFence::create (Device.Get (), " QueueSubmitFence" );
458+ if (!FenceOrErr)
459+ return FenceOrErr.takeError ();
460+ return DXQueue (CmdQueue, std::move (*FenceOrErr));
451461 }
462+
463+ llvm::Error submit (
464+ llvm::SmallVectorImpl<std::unique_ptr<offloadtest::CommandBuffer>> &&CBs)
465+ override ;
452466};
453467
454468class DXCommandBuffer : public offloadtest ::CommandBuffer {
@@ -483,6 +497,42 @@ class DXCommandBuffer : public offloadtest::CommandBuffer {
483497 DXCommandBuffer () : CommandBuffer(GPUAPI::DirectX) {}
484498};
485499
500+ llvm::Error DXQueue::submit (
501+ llvm::SmallVectorImpl<std::unique_ptr<offloadtest::CommandBuffer>> &&CBs) {
502+ llvm::SmallVector<ID3D12CommandList *> CmdLists;
503+ CmdLists.reserve (CBs.size ());
504+
505+ // Wait on the previous submit's fence value before executing this batch,
506+ // so that back-to-back submits don't overlap on the GPU. Skip on first
507+ // submit since Wait(fence, 0) triggers a D3D12 validation warning.
508+ if (FenceCounter > 0 )
509+ if (auto Err =
510+ HR::toError (Queue->Wait (SubmitFence->Fence .Get (), FenceCounter),
511+ " Failed to wait on previous submit." ))
512+ return Err;
513+
514+ for (auto &CB : CBs) {
515+ auto &DCB = *llvm::cast<DXCommandBuffer>(CB.get ());
516+ if (auto Err =
517+ HR::toError (DCB.CmdList ->Close (), " Failed to close command list." ))
518+ return Err;
519+ CmdLists.push_back (DCB.CmdList .Get ());
520+ }
521+
522+ Queue->ExecuteCommandLists (CmdLists.size (), CmdLists.data ());
523+
524+ const uint64_t CurrentCounter = ++FenceCounter;
525+ if (auto Err =
526+ HR::toError (Queue->Signal (SubmitFence->Fence .Get (), CurrentCounter),
527+ " Failed to add signal." ))
528+ return Err;
529+
530+ // TODO: Return a Fence+value with keepalive lists instead of blocking here.
531+ if (auto Err = SubmitFence->waitForCompletion (CurrentCounter))
532+ return Err;
533+
534+ return llvm::Error::success ();
535+ }
486536class DXDevice : public offloadtest ::Device {
487537private:
488538 ComPtr<IDXCoreAdapter> Adapter;
@@ -515,7 +565,6 @@ class DXDevice : public offloadtest::Device {
515565 ComPtr<ID3D12DescriptorHeap> DescHeap;
516566 ComPtr<ID3D12PipelineState> PSO;
517567 std::unique_ptr<DXCommandBuffer> CB;
518- std::unique_ptr<offloadtest::Fence> CompletionFence;
519568
520569 // Resources for graphics pipelines.
521570 std::unique_ptr<offloadtest::Texture> RT;
@@ -530,10 +579,10 @@ class DXDevice : public offloadtest::Device {
530579public:
531580 DXDevice (ComPtr<IDXCoreAdapter> A, ComPtr<ID3D12Device> D, DXQueue Q,
532581 std::string Desc)
533- : Adapter(A), Device(D), GraphicsQueue(Q ) {
582+ : Adapter(A), Device(D), GraphicsQueue(std::move(Q) ) {
534583 Description = Desc;
535584 }
536- DXDevice (const DXDevice &) = default ;
585+ DXDevice (const DXDevice &) = delete ;
537586
538587 ~DXDevice () override {
539588 const std::lock_guard<std::mutex> Lock (SignalHandlerMutex);
@@ -698,9 +747,8 @@ class DXDevice : public offloadtest::Device {
698747 auto GraphicsQueueOrErr = DXQueue::createGraphicsQueue (Device);
699748 if (!GraphicsQueueOrErr)
700749 return GraphicsQueueOrErr.takeError ();
701- const DXQueue GraphicsQueue = *GraphicsQueueOrErr;
702-
703- return std::make_unique<DXDevice>(Adapter, Device, std::move (GraphicsQueue),
750+ return std::make_unique<DXDevice>(Adapter, Device,
751+ std::move (*GraphicsQueueOrErr),
704752 std::string (DescVec.data ()));
705753 }
706754
@@ -911,7 +959,7 @@ class DXDevice : public offloadtest::Device {
911959 return Ret;
912960 }
913961
914- llvm::Error setupReservedResource (Resource &R, InvocationState &IS,
962+ llvm::Error setupReservedResource (Resource &R,
915963 const D3D12_RESOURCE_DESC ResDesc,
916964 ComPtr<ID3D12Heap> &Heap,
917965 ComPtr<ID3D12Resource> &Buffer) {
@@ -949,7 +997,19 @@ class DXDevice : public offloadtest::Device {
949997 Buffer.Get (), 1 , &StartCoord, &RegionSize, Heap.Get (), 1 , &RangeFlag,
950998 &HeapRangeStartOffset, &RangeTileCount, D3D12_TILE_MAPPING_FLAG_NONE);
951999
952- return waitForSignal (IS);
1000+ // Synchronize after UpdateTileMappings, which is a queue operation (not
1001+ // recorded into a command list). Use a dedicated fence to avoid
1002+ // conflicting signal values with the queue's SubmitFence.
1003+ auto FenceOrErr = DXFence::create (Device.Get (), " TileMappingFence" );
1004+ if (!FenceOrErr)
1005+ return FenceOrErr.takeError ();
1006+
1007+ if (auto Err =
1008+ HR::toError (CommandQueue->Signal ((*FenceOrErr)->Fence .Get (), 1 ),
1009+ " Failed to add signal." ))
1010+ return Err;
1011+
1012+ return (*FenceOrErr)->waitForCompletion (1 );
9531013 }
9541014
9551015 llvm::Expected<ResourceBundle> createSRV (Resource &R, InvocationState &IS) {
@@ -1008,7 +1068,7 @@ class DXDevice : public offloadtest::Device {
10081068
10091069 ComPtr<ID3D12Heap> Heap; // optional, only created if NumTiles > 0
10101070 if (R.IsReserved )
1011- if (auto Err = setupReservedResource (R, IS, ResDesc, Heap, Buffer))
1071+ if (auto Err = setupReservedResource (R, ResDesc, Heap, Buffer))
10121072 return Err;
10131073
10141074 // Upload data initialization
@@ -1134,7 +1194,7 @@ class DXDevice : public offloadtest::Device {
11341194
11351195 ComPtr<ID3D12Heap> Heap; // optional, only created if NumTiles > 0
11361196 if (R.IsReserved )
1137- if (auto Err = setupReservedResource (R, IS, ResDesc, Heap, Buffer))
1197+ if (auto Err = setupReservedResource (R, ResDesc, Heap, Buffer))
11381198 return Err;
11391199
11401200 // Upload data initialization
@@ -1388,33 +1448,8 @@ class DXDevice : public offloadtest::Device {
13881448 IS.CB ->CmdList ->ResourceBarrier (1 , &Barrier);
13891449 }
13901450
1391- llvm::Error waitForSignal (InvocationState &IS) {
1392- // This is a hack but it works since this is all single threaded code.
1393- static uint64_t FenceCounter = 0 ;
1394- const uint64_t CurrentCounter = FenceCounter + 1 ;
1395- auto *F = static_cast <DXFence *>(IS.CompletionFence .get ());
1396-
1397- if (auto Err = HR::toError (
1398- GraphicsQueue.Queue ->Signal (F->Fence .Get (), CurrentCounter),
1399- " Failed to add signal." ))
1400- return Err;
1401-
1402- if (auto Err = IS.CompletionFence ->waitForCompletion (CurrentCounter))
1403- return Err;
1404-
1405- FenceCounter = CurrentCounter;
1406- return llvm::Error::success ();
1407- }
1408-
14091451 llvm::Error executeCommandList (InvocationState &IS) {
1410- if (auto Err = HR::toError (IS.CB ->CmdList ->Close (),
1411- " Failed to close command list." ))
1412- return Err;
1413-
1414- ID3D12CommandList *const CmdLists[] = {IS.CB ->CmdList .Get ()};
1415- GraphicsQueue.Queue ->ExecuteCommandLists (1 , CmdLists);
1416-
1417- return waitForSignal (IS);
1452+ return GraphicsQueue.submit (std::move (IS.CB ));
14181453 }
14191454
14201455 llvm::Error createComputeCommands (Pipeline &P, InvocationState &IS) {
@@ -1870,11 +1905,6 @@ class DXDevice : public offloadtest::Device {
18701905 State.CB = std::move (*CBOrErr);
18711906 llvm::outs () << " Command buffer created.\n " ;
18721907
1873- auto FenceOrErr = createFence (" Fence" );
1874- if (!FenceOrErr)
1875- return FenceOrErr.takeError ();
1876- State.CompletionFence = std::move (*FenceOrErr);
1877-
18781908 if (auto Err = createBuffers (P, State))
18791909 return Err;
18801910 llvm::outs () << " Buffers created.\n " ;
0 commit comments