Skip to content

Commit 1a889f9

Browse files
Complete GPU upload manager implementation
1 parent 99890f1 commit 1a889f9

6 files changed

Lines changed: 244 additions & 33 deletions

File tree

Graphics/GraphicsTools/include/GPUUploadManagerImpl.hpp

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,8 @@ class GPUUploadManagerImpl final : public ObjectBase<IGPUUploadManager>
5353

5454
virtual void DILIGENT_CALL_TYPE RenderThreadUpdate(IDeviceContext* pContext) override final;
5555

56-
virtual void DILIGENT_CALL_TYPE ScheduleBufferUpdate(IBuffer* pDstBuffer,
56+
virtual void DILIGENT_CALL_TYPE ScheduleBufferUpdate(IDeviceContext* pContext,
57+
IBuffer* pDstBuffer,
5758
Uint32 DstOffset,
5859
Uint32 NumBytes,
5960
const void* pSrcData,
@@ -140,14 +141,16 @@ class GPUUploadManagerImpl final : public ObjectBase<IGPUUploadManager>
140141
Uint64 GetFenceValue() const { return m_FenceValue; }
141142
Uint32 GetSize() const { return m_Size; }
142143

143-
// Returns the number of pending operations. This is used for testing and debugging purposes.
144-
size_t DbgGetNumPendingOps() const { return m_NumPendingOps.load(std::memory_order_relaxed); }
144+
// Returns the number of pending operations.
145+
size_t GetNumPendingOps() const { return m_NumPendingOps.load(std::memory_order_acquire); }
145146

147+
#ifdef DILIGENT_DEBUG
146148
// Returns the number of active writers. This is used for testing and debugging purposes.
147-
Uint32 DbgGetWriterCount() const { return m_State.load(std::memory_order_relaxed) & WRITER_MASK; }
149+
Uint32 DbgGetWriterCount() const { return m_State.load(std::memory_order_acquire) & WRITER_MASK; }
148150

149151
// Returns true if the page is sealed for new writes. This is used for testing and debugging purposes.
150-
bool DbgIsSealed() const { return (m_State.load(std::memory_order_relaxed) & SEALED_BIT) != 0; }
152+
bool DbgIsSealed() const { return (m_State.load(std::memory_order_acquire) & SEALED_BIT) != 0; }
153+
#endif
151154

152155
void ReleaseStagingBuffer(IDeviceContext* pContext);
153156

@@ -202,6 +205,9 @@ class GPUUploadManagerImpl final : public ObjectBase<IGPUUploadManager>
202205
private:
203206
void ReclaimCompletedPages(IDeviceContext* pContext);
204207
bool SealAndSwapCurrentPage(IDeviceContext* pContext);
208+
void UpdateFreePages(IDeviceContext* pContext);
209+
void ProcessPendingPages(IDeviceContext* pContext);
210+
bool TryRotatePage(IDeviceContext* pContext, Page* ExpectedCurrent);
205211
bool TryEnqueuePage(Page* P);
206212
Page* AcquireFreePage(IDeviceContext* pContext);
207213
Page* CreatePage(IDeviceContext* pContext, Uint32 MinSize = 0);
@@ -216,8 +222,21 @@ class GPUUploadManagerImpl final : public ObjectBase<IGPUUploadManager>
216222
MPSCQueue<Page*> m_PendingPages;
217223

218224
// Pages that are ready to be used for writing. They are already mapped.
219-
std::mutex m_FreePagesMtx;
220-
std::vector<Page*> m_FreePages;
225+
class FreePages
226+
{
227+
public:
228+
void Push(Page** ppPages, size_t NumPages);
229+
void Push(Page* pPage) { Push(&pPage, 1); }
230+
Page* Pop(Uint32 MinSize = 0);
231+
size_t Size() const { return m_Size.load(std::memory_order_relaxed); }
232+
233+
private:
234+
std::mutex m_PagesMtx;
235+
std::vector<Page*> m_Pages;
236+
std::atomic<size_t> m_Size{0};
237+
};
238+
FreePages m_FreePages;
239+
221240
std::vector<Page*> m_NewFreePages;
222241

223242
// Pages that have been submitted for execution and are being processed by the GPU.

Graphics/GraphicsTools/interface/GPUUploadManager.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,16 @@ DILIGENT_BEGIN_INTERFACE(IGPUUploadManager, IObject)
6868

6969
/// Schedules an asynchronous buffer update operation.
7070
///
71+
/// \param [in] pContext - If calling the method from the render thread, a pointer to the device context.
72+
/// If calling the method from a worker thread, this parameter must be null.
7173
/// \param [in] pDstBuffer - Pointer to the destination buffer.
7274
/// \param [in] DstOffset - Offset in the destination buffer.
7375
/// \param [in] NumBytes - Number of bytes to copy.
7476
/// \param [in] pSrcData - Pointer to the source data.
7577
/// \param [in] Callback - Optional callback to be called when the GPU copy operation is scheduled for execution.
7678
/// \param [in] pCallbackData - Optional pointer to user data that will be passed to the callback.
7779
VIRTUAL void METHOD(ScheduleBufferUpdate)(THIS_
80+
IDeviceContext* pContext,
7881
IBuffer* pDstBuffer,
7982
Uint32 DstOffset,
8083
Uint32 NumBytes,

Graphics/GraphicsTools/src/GPUUploadManagerImpl.cpp

Lines changed: 145 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ GPUUploadManagerImpl::Page::Page(IRenderDevice* pDevice,
110110
pDevice->CreateBuffer(Desc, nullptr, &m_pStagingBuffer);
111111
VERIFY_EXPR(m_pStagingBuffer != nullptr);
112112

113-
pContext->MapBuffer(m_pStagingBuffer, MAP_WRITE, MAP_FLAG_NONE, m_pData);
113+
pContext->MapBuffer(m_pStagingBuffer, MAP_WRITE, MAP_FLAG_DO_NOT_WAIT, m_pData);
114114
VERIFY_EXPR(m_pData != nullptr);
115115
}
116116

@@ -270,7 +270,7 @@ void GPUUploadManagerImpl::Page::Reset(IDeviceContext* pContext)
270270
{
271271
if (!m_PersistentMapped)
272272
{
273-
pContext->MapBuffer(m_pStagingBuffer, MAP_WRITE, MAP_FLAG_NONE, m_pData);
273+
pContext->MapBuffer(m_pStagingBuffer, MAP_WRITE, MAP_FLAG_DO_NOT_WAIT, m_pData);
274274
}
275275
VERIFY_EXPR(m_pData != nullptr);
276276
}
@@ -296,6 +296,34 @@ void GPUUploadManagerImpl::Page::ReleaseStagingBuffer(IDeviceContext* pContext)
296296
m_pStagingBuffer.Release();
297297
}
298298

299+
void GPUUploadManagerImpl::FreePages::Push(Page** ppPages, size_t NumPages)
300+
{
301+
if (NumPages == 0)
302+
return;
303+
304+
std::lock_guard<std::mutex> Guard{m_PagesMtx};
305+
m_Pages.insert(m_Pages.end(), ppPages, ppPages + NumPages);
306+
m_Size.store(m_Pages.size(), std::memory_order_relaxed);
307+
}
308+
309+
GPUUploadManagerImpl::Page* GPUUploadManagerImpl::FreePages::Pop(Uint32 MinSize)
310+
{
311+
Page* P = nullptr;
312+
{
313+
std::lock_guard<std::mutex> Guard{m_PagesMtx};
314+
for (auto it = m_Pages.begin(); it != m_Pages.end(); ++it)
315+
{
316+
if ((*it)->GetSize() >= MinSize)
317+
{
318+
P = *it;
319+
m_Pages.erase(it);
320+
m_Size.store(m_Pages.size(), std::memory_order_relaxed);
321+
break;
322+
}
323+
}
324+
}
325+
return P;
326+
}
299327

300328
GPUUploadManagerImpl::GPUUploadManagerImpl(IReferenceCounters* pRefCounters, const GPUUploadManagerCreateInfo& CI) :
301329
TBase{pRefCounters},
@@ -325,19 +353,66 @@ void GPUUploadManagerImpl::RenderThreadUpdate(IDeviceContext* pContext)
325353
DEV_CHECK_ERR(pContext == m_pContext, "The context passed to RenderThreadUpdate must be the same as the one used to create the GPUUploadManagerImpl");
326354

327355
SealAndSwapCurrentPage(pContext);
328-
329356
ReclaimCompletedPages(pContext);
357+
UpdateFreePages(pContext);
358+
ProcessPendingPages(pContext);
330359

331-
m_pFence->Signal(m_NextFenceValue++);
360+
pContext->EnqueueSignal(m_pFence, m_NextFenceValue++);
332361
}
333362

334-
void GPUUploadManagerImpl::ScheduleBufferUpdate(IBuffer* pDstBuffer,
363+
void GPUUploadManagerImpl::ScheduleBufferUpdate(IDeviceContext* pContext,
364+
IBuffer* pDstBuffer,
335365
Uint32 DstOffset,
336366
Uint32 NumBytes,
337367
const void* pSrcData,
338368
GPUUploadEnqueuedCallbackType Callback,
339369
void* pCallbackData)
370+
340371
{
372+
bool IsFirstAttempt = true;
373+
374+
auto UpdatePendingSizeAndTryRotate = [&](Page* P) {
375+
if (IsFirstAttempt)
376+
{
377+
// Atomically update the max pending update size to ensure the next page is large enough
378+
AtomicMax(m_MaxPendingUpdateSize, NumBytes, std::memory_order_relaxed);
379+
m_TotalPendingUpdateSize.fetch_add(NumBytes, std::memory_order_relaxed);
380+
IsFirstAttempt = false;
381+
}
382+
if (!TryRotatePage(pContext, P))
383+
{
384+
std::this_thread::yield();
385+
}
386+
};
387+
388+
while (true)
389+
{
390+
Page* P = m_pCurrentPage.load(std::memory_order_acquire);
391+
Page::Writer Writer = P->TryBeginWriting();
392+
if (!Writer)
393+
{
394+
UpdatePendingSizeAndTryRotate(P);
395+
continue;
396+
}
397+
398+
const bool UpdateScheduled = Writer.ScheduleBufferUpdate(pDstBuffer, DstOffset, NumBytes, pSrcData, Callback, pCallbackData);
399+
if (Writer.EndWriting() == Page::WritingStatus::LastWriterSealed)
400+
{
401+
// We were the last writer
402+
TryEnqueuePage(P);
403+
}
404+
405+
if (UpdateScheduled)
406+
{
407+
if (!IsFirstAttempt)
408+
m_TotalPendingUpdateSize.fetch_sub(NumBytes, std::memory_order_relaxed);
409+
break;
410+
}
411+
else
412+
{
413+
UpdatePendingSizeAndTryRotate(P);
414+
}
415+
}
341416
}
342417

343418
GPUUploadManagerImpl::Page* GPUUploadManagerImpl::CreatePage(IDeviceContext* pContext, Uint32 MinSize)
@@ -374,11 +449,42 @@ bool GPUUploadManagerImpl::SealAndSwapCurrentPage(IDeviceContext* pContext)
374449
return true;
375450
}
376451

452+
bool GPUUploadManagerImpl::TryRotatePage(IDeviceContext* pContext, Page* ExpectedCurrent)
453+
{
454+
// Grab a free page (workers can't create, so pContext=null)
455+
Page* Fresh = AcquireFreePage(pContext);
456+
if (!Fresh)
457+
return false;
458+
459+
Page* Cur = ExpectedCurrent;
460+
if (!m_pCurrentPage.compare_exchange_strong(Cur, Fresh, std::memory_order_acq_rel))
461+
{
462+
// Lost the race: put Fresh back
463+
m_FreePages.Push(Fresh);
464+
return true; // Rotation happened by someone else
465+
}
466+
467+
// We won: seal and enqueue if no writers
468+
if (ExpectedCurrent && ExpectedCurrent->TrySeal() == Page::SealStatus::Ready)
469+
TryEnqueuePage(ExpectedCurrent);
470+
471+
return true;
472+
}
473+
377474
bool GPUUploadManagerImpl::TryEnqueuePage(Page* P)
378475
{
476+
VERIFY_EXPR(P->DbgIsSealed());
379477
if (P->TryEnqueue())
380478
{
381-
m_PendingPages.Enqueue(P);
479+
if (P->GetNumPendingOps() > 0)
480+
{
481+
m_PendingPages.Enqueue(P);
482+
}
483+
else
484+
{
485+
P->Reset(nullptr);
486+
m_FreePages.Push(P);
487+
}
382488
return true;
383489
}
384490
return false;
@@ -407,31 +513,49 @@ void GPUUploadManagerImpl::ReclaimCompletedPages(IDeviceContext* pContext)
407513
m_InFlightPages.swap(m_TmpInFlightPages);
408514
m_TmpInFlightPages.clear();
409515

410-
{
411-
std::lock_guard<std::mutex> Guard{m_FreePagesMtx};
412-
m_FreePages.insert(m_FreePages.end(), m_NewFreePages.begin(), m_NewFreePages.end());
413-
}
516+
m_FreePages.Push(m_NewFreePages.data(), m_NewFreePages.size());
414517
m_NewFreePages.clear();
415518
}
416519

417-
GPUUploadManagerImpl::Page* GPUUploadManagerImpl::AcquireFreePage(IDeviceContext* pContext)
520+
void GPUUploadManagerImpl::UpdateFreePages(IDeviceContext* pContext)
418521
{
419-
Uint32 MaxPendingUpdateSize = m_MaxPendingUpdateSize.load(std::memory_order_relaxed);
522+
VERIFY_EXPR(pContext != nullptr);
420523

421-
Page* P = nullptr;
524+
const Uint32 TotalPendingSize = m_TotalPendingUpdateSize.exchange(0, std::memory_order_relaxed);
525+
const Uint32 MinimalPageCount = std::max((TotalPendingSize + m_PageSize - 1) / m_PageSize, 1u);
526+
527+
const Uint32 NumFreePages = static_cast<Uint32>(m_FreePages.Size());
528+
const Uint32 NumPagesToCreate = MinimalPageCount > NumFreePages ? MinimalPageCount - NumFreePages : 0;
529+
530+
if (NumPagesToCreate > 0)
422531
{
423-
std::lock_guard<std::mutex> Guard{m_FreePagesMtx};
424-
for (auto it = m_FreePages.begin(); it != m_FreePages.end(); ++it)
532+
m_NewFreePages.clear();
533+
for (Uint32 i = 0; i < NumPagesToCreate; ++i)
425534
{
426-
if ((*it)->GetSize() >= MaxPendingUpdateSize)
427-
{
428-
P = *it;
429-
m_FreePages.erase(it);
430-
break;
431-
}
535+
m_NewFreePages.push_back(CreatePage(pContext));
432536
}
537+
m_FreePages.Push(m_NewFreePages.data(), m_NewFreePages.size());
538+
m_NewFreePages.clear();
433539
}
540+
}
541+
542+
void GPUUploadManagerImpl::ProcessPendingPages(IDeviceContext* pContext)
543+
{
544+
VERIFY_EXPR(pContext != nullptr);
545+
546+
Page* ReadyPage = nullptr;
547+
while (m_PendingPages.Dequeue(ReadyPage))
548+
{
549+
ReadyPage->ExecutePendingOps(pContext, m_NextFenceValue);
550+
m_InFlightPages.push_back(ReadyPage);
551+
}
552+
}
553+
554+
GPUUploadManagerImpl::Page* GPUUploadManagerImpl::AcquireFreePage(IDeviceContext* pContext)
555+
{
556+
Uint32 MaxPendingUpdateSize = m_MaxPendingUpdateSize.load(std::memory_order_relaxed);
434557

558+
Page* P = m_FreePages.Pop(MaxPendingUpdateSize);
435559
if (P == nullptr && pContext != nullptr)
436560
{
437561
P = CreatePage(pContext, MaxPendingUpdateSize);

Tests/DiligentCoreAPITest/src/GPUUploadManagerTest.cpp

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include <atomic>
3535
#include <thread>
3636
#include <array>
37+
#include <cstring>
3738

3839
using namespace Diligent;
3940
using namespace Diligent::Testing;
@@ -53,4 +54,68 @@ TEST(GPUUploadManagerTest, Creation)
5354
ASSERT_TRUE(pUploadManager != nullptr);
5455
}
5556

57+
void VerifyBufferContents(IBuffer* pBuffer, const std::vector<Uint8>& ExpectedData)
58+
{
59+
GPUTestingEnvironment* pEnv = GPUTestingEnvironment::GetInstance();
60+
IRenderDevice* pDevice = pEnv->GetDevice();
61+
IDeviceContext* pContext = pEnv->GetDeviceContext();
62+
63+
BufferDesc Desc = pBuffer->GetDesc();
64+
Desc.Name = "GPUUploadManagerTest readback buffer";
65+
Desc.Usage = USAGE_STAGING;
66+
Desc.CPUAccessFlags = CPU_ACCESS_READ;
67+
Desc.BindFlags = BIND_NONE;
68+
69+
RefCntAutoPtr<IBuffer> pReadbackBuffer;
70+
pDevice->CreateBuffer(Desc, nullptr, &pReadbackBuffer);
71+
ASSERT_TRUE(pBuffer != nullptr);
72+
73+
pContext->CopyBuffer(pBuffer, 0, RESOURCE_STATE_TRANSITION_MODE_TRANSITION,
74+
pReadbackBuffer, 0, ExpectedData.size(), RESOURCE_STATE_TRANSITION_MODE_TRANSITION);
75+
pContext->WaitForIdle();
76+
77+
void* pBufferData = nullptr;
78+
pContext->MapBuffer(pReadbackBuffer, MAP_READ, MAP_FLAG_DO_NOT_WAIT, pBufferData);
79+
ASSERT_NE(pBufferData, nullptr);
80+
81+
EXPECT_TRUE(std::memcmp(pBufferData, ExpectedData.data(), ExpectedData.size()) == 0) << "Buffer contents do not match expected data";
82+
}
83+
84+
TEST(GPUUploadManagerTest, ScheduleUpdates)
85+
{
86+
GPUTestingEnvironment* pEnv = GPUTestingEnvironment::GetInstance();
87+
IRenderDevice* pDevice = pEnv->GetDevice();
88+
IDeviceContext* pContext = pEnv->GetDeviceContext();
89+
90+
RefCntAutoPtr<IGPUUploadManager> pUploadManager;
91+
GPUUploadManagerCreateInfo CreateInfo{pDevice, pContext, 1024};
92+
CreateGPUUploadManager(CreateInfo, &pUploadManager);
93+
ASSERT_TRUE(pUploadManager != nullptr);
94+
95+
std::vector<Uint8> BufferData(4096);
96+
for (size_t i = 0; i < BufferData.size(); ++i)
97+
{
98+
BufferData[i] = static_cast<Uint8>(i % 256);
99+
}
100+
101+
BufferDesc Desc;
102+
Desc.Name = "GPUUploadManagerTest buffer";
103+
Desc.Size = BufferData.size();
104+
Desc.Usage = USAGE_DEFAULT;
105+
Desc.BindFlags = BIND_VERTEX_BUFFER;
106+
107+
RefCntAutoPtr<IBuffer> pBuffer;
108+
pDevice->CreateBuffer(Desc, nullptr, &pBuffer);
109+
ASSERT_TRUE(pBuffer);
110+
111+
pUploadManager->ScheduleBufferUpdate(pContext, pBuffer, 0, 256, &BufferData[0]);
112+
pUploadManager->ScheduleBufferUpdate(pContext, pBuffer, 256, 256, &BufferData[256]);
113+
pUploadManager->ScheduleBufferUpdate(pContext, pBuffer, 512, 1024, &BufferData[512]);
114+
pUploadManager->ScheduleBufferUpdate(pContext, pBuffer, 1536, 512, &BufferData[1536]);
115+
pUploadManager->ScheduleBufferUpdate(pContext, pBuffer, 2048, 2048, &BufferData[2048]);
116+
pUploadManager->RenderThreadUpdate(pContext);
117+
118+
VerifyBufferContents(pBuffer, BufferData);
119+
}
120+
56121
} // namespace

0 commit comments

Comments
 (0)