@@ -110,7 +110,7 @@ GPUUploadManagerImpl::Page::Page(IRenderDevice* pDevice,
110110 pDevice->CreateBuffer (Desc, nullptr , &m_pStagingBuffer);
111111 VERIFY_EXPR (m_pStagingBuffer != nullptr );
112112
113- pContext->MapBuffer (m_pStagingBuffer, MAP_WRITE, MAP_FLAG_NONE , m_pData);
113+ pContext->MapBuffer (m_pStagingBuffer, MAP_WRITE, MAP_FLAG_DO_NOT_WAIT , m_pData);
114114 VERIFY_EXPR (m_pData != nullptr );
115115}
116116
@@ -270,7 +270,7 @@ void GPUUploadManagerImpl::Page::Reset(IDeviceContext* pContext)
270270 {
271271 if (!m_PersistentMapped)
272272 {
273- pContext->MapBuffer (m_pStagingBuffer, MAP_WRITE, MAP_FLAG_NONE , m_pData);
273+ pContext->MapBuffer (m_pStagingBuffer, MAP_WRITE, MAP_FLAG_DO_NOT_WAIT , m_pData);
274274 }
275275 VERIFY_EXPR (m_pData != nullptr );
276276 }
@@ -296,6 +296,34 @@ void GPUUploadManagerImpl::Page::ReleaseStagingBuffer(IDeviceContext* pContext)
296296 m_pStagingBuffer.Release ();
297297}
298298
299+ void GPUUploadManagerImpl::FreePages::Push (Page** ppPages, size_t NumPages)
300+ {
301+ if (NumPages == 0 )
302+ return ;
303+
304+ std::lock_guard<std::mutex> Guard{m_PagesMtx};
305+ m_Pages.insert (m_Pages.end (), ppPages, ppPages + NumPages);
306+ m_Size.store (m_Pages.size (), std::memory_order_relaxed);
307+ }
308+
309+ GPUUploadManagerImpl::Page* GPUUploadManagerImpl::FreePages::Pop (Uint32 MinSize)
310+ {
311+ Page* P = nullptr ;
312+ {
313+ std::lock_guard<std::mutex> Guard{m_PagesMtx};
314+ for (auto it = m_Pages.begin (); it != m_Pages.end (); ++it)
315+ {
316+ if ((*it)->GetSize () >= MinSize)
317+ {
318+ P = *it;
319+ m_Pages.erase (it);
320+ m_Size.store (m_Pages.size (), std::memory_order_relaxed);
321+ break ;
322+ }
323+ }
324+ }
325+ return P;
326+ }
299327
300328GPUUploadManagerImpl::GPUUploadManagerImpl (IReferenceCounters* pRefCounters, const GPUUploadManagerCreateInfo& CI) :
301329 TBase{pRefCounters},
@@ -325,19 +353,66 @@ void GPUUploadManagerImpl::RenderThreadUpdate(IDeviceContext* pContext)
325353 DEV_CHECK_ERR (pContext == m_pContext, " The context passed to RenderThreadUpdate must be the same as the one used to create the GPUUploadManagerImpl" );
326354
327355 SealAndSwapCurrentPage (pContext);
328-
329356 ReclaimCompletedPages (pContext);
357+ UpdateFreePages (pContext);
358+ ProcessPendingPages (pContext);
330359
331- m_pFence-> Signal ( m_NextFenceValue++);
360+ pContext-> EnqueueSignal (m_pFence, m_NextFenceValue++);
332361}
333362
334- void GPUUploadManagerImpl::ScheduleBufferUpdate (IBuffer* pDstBuffer,
363+ void GPUUploadManagerImpl::ScheduleBufferUpdate (IDeviceContext* pContext,
364+ IBuffer* pDstBuffer,
335365 Uint32 DstOffset,
336366 Uint32 NumBytes,
337367 const void * pSrcData,
338368 GPUUploadEnqueuedCallbackType Callback,
339369 void * pCallbackData)
370+
340371{
372+ bool IsFirstAttempt = true ;
373+
374+ auto UpdatePendingSizeAndTryRotate = [&](Page* P) {
375+ if (IsFirstAttempt)
376+ {
377+ // Atomically update the max pending update size to ensure the next page is large enough
378+ AtomicMax (m_MaxPendingUpdateSize, NumBytes, std::memory_order_relaxed);
379+ m_TotalPendingUpdateSize.fetch_add (NumBytes, std::memory_order_relaxed);
380+ IsFirstAttempt = false ;
381+ }
382+ if (!TryRotatePage (pContext, P))
383+ {
384+ std::this_thread::yield ();
385+ }
386+ };
387+
388+ while (true )
389+ {
390+ Page* P = m_pCurrentPage.load (std::memory_order_acquire);
391+ Page::Writer Writer = P->TryBeginWriting ();
392+ if (!Writer)
393+ {
394+ UpdatePendingSizeAndTryRotate (P);
395+ continue ;
396+ }
397+
398+ const bool UpdateScheduled = Writer.ScheduleBufferUpdate (pDstBuffer, DstOffset, NumBytes, pSrcData, Callback, pCallbackData);
399+ if (Writer.EndWriting () == Page::WritingStatus::LastWriterSealed)
400+ {
401+ // We were the last writer
402+ TryEnqueuePage (P);
403+ }
404+
405+ if (UpdateScheduled)
406+ {
407+ if (!IsFirstAttempt)
408+ m_TotalPendingUpdateSize.fetch_sub (NumBytes, std::memory_order_relaxed);
409+ break ;
410+ }
411+ else
412+ {
413+ UpdatePendingSizeAndTryRotate (P);
414+ }
415+ }
341416}
342417
343418GPUUploadManagerImpl::Page* GPUUploadManagerImpl::CreatePage (IDeviceContext* pContext, Uint32 MinSize)
@@ -374,11 +449,42 @@ bool GPUUploadManagerImpl::SealAndSwapCurrentPage(IDeviceContext* pContext)
374449 return true ;
375450}
376451
452+ bool GPUUploadManagerImpl::TryRotatePage (IDeviceContext* pContext, Page* ExpectedCurrent)
453+ {
454+ // Grab a free page (workers can't create, so pContext=null)
455+ Page* Fresh = AcquireFreePage (pContext);
456+ if (!Fresh)
457+ return false ;
458+
459+ Page* Cur = ExpectedCurrent;
460+ if (!m_pCurrentPage.compare_exchange_strong (Cur, Fresh, std::memory_order_acq_rel))
461+ {
462+ // Lost the race: put Fresh back
463+ m_FreePages.Push (Fresh);
464+ return true ; // Rotation happened by someone else
465+ }
466+
467+ // We won: seal and enqueue if no writers
468+ if (ExpectedCurrent && ExpectedCurrent->TrySeal () == Page::SealStatus::Ready)
469+ TryEnqueuePage (ExpectedCurrent);
470+
471+ return true ;
472+ }
473+
377474bool GPUUploadManagerImpl::TryEnqueuePage (Page* P)
378475{
476+ VERIFY_EXPR (P->DbgIsSealed ());
379477 if (P->TryEnqueue ())
380478 {
381- m_PendingPages.Enqueue (P);
479+ if (P->GetNumPendingOps () > 0 )
480+ {
481+ m_PendingPages.Enqueue (P);
482+ }
483+ else
484+ {
485+ P->Reset (nullptr );
486+ m_FreePages.Push (P);
487+ }
382488 return true ;
383489 }
384490 return false ;
@@ -407,31 +513,49 @@ void GPUUploadManagerImpl::ReclaimCompletedPages(IDeviceContext* pContext)
407513 m_InFlightPages.swap (m_TmpInFlightPages);
408514 m_TmpInFlightPages.clear ();
409515
410- {
411- std::lock_guard<std::mutex> Guard{m_FreePagesMtx};
412- m_FreePages.insert (m_FreePages.end (), m_NewFreePages.begin (), m_NewFreePages.end ());
413- }
516+ m_FreePages.Push (m_NewFreePages.data (), m_NewFreePages.size ());
414517 m_NewFreePages.clear ();
415518}
416519
417- GPUUploadManagerImpl::Page* GPUUploadManagerImpl::AcquireFreePage (IDeviceContext* pContext)
520+ void GPUUploadManagerImpl::UpdateFreePages (IDeviceContext* pContext)
418521{
419- Uint32 MaxPendingUpdateSize = m_MaxPendingUpdateSize. load (std::memory_order_relaxed );
522+ VERIFY_EXPR (pContext != nullptr );
420523
421- Page* P = nullptr ;
524+ const Uint32 TotalPendingSize = m_TotalPendingUpdateSize.exchange (0 , std::memory_order_relaxed);
525+ const Uint32 MinimalPageCount = std::max ((TotalPendingSize + m_PageSize - 1 ) / m_PageSize, 1u );
526+
527+ const Uint32 NumFreePages = static_cast <Uint32>(m_FreePages.Size ());
528+ const Uint32 NumPagesToCreate = MinimalPageCount > NumFreePages ? MinimalPageCount - NumFreePages : 0 ;
529+
530+ if (NumPagesToCreate > 0 )
422531 {
423- std::lock_guard<std::mutex> Guard{m_FreePagesMtx} ;
424- for (auto it = m_FreePages. begin (); it != m_FreePages. end () ; ++it )
532+ m_NewFreePages. clear () ;
533+ for (Uint32 i = 0 ; i < NumPagesToCreate ; ++i )
425534 {
426- if ((*it)->GetSize () >= MaxPendingUpdateSize)
427- {
428- P = *it;
429- m_FreePages.erase (it);
430- break ;
431- }
535+ m_NewFreePages.push_back (CreatePage (pContext));
432536 }
537+ m_FreePages.Push (m_NewFreePages.data (), m_NewFreePages.size ());
538+ m_NewFreePages.clear ();
433539 }
540+ }
541+
542+ void GPUUploadManagerImpl::ProcessPendingPages (IDeviceContext* pContext)
543+ {
544+ VERIFY_EXPR (pContext != nullptr );
545+
546+ Page* ReadyPage = nullptr ;
547+ while (m_PendingPages.Dequeue (ReadyPage))
548+ {
549+ ReadyPage->ExecutePendingOps (pContext, m_NextFenceValue);
550+ m_InFlightPages.push_back (ReadyPage);
551+ }
552+ }
553+
554+ GPUUploadManagerImpl::Page* GPUUploadManagerImpl::AcquireFreePage (IDeviceContext* pContext)
555+ {
556+ Uint32 MaxPendingUpdateSize = m_MaxPendingUpdateSize.load (std::memory_order_relaxed);
434557
558+ Page* P = m_FreePages.Pop (MaxPendingUpdateSize);
435559 if (P == nullptr && pContext != nullptr )
436560 {
437561 P = CreatePage (pContext, MaxPendingUpdateSize);
0 commit comments