diff --git a/cmake/server.cmake b/cmake/server.cmake index a76d1c1340..53242b30aa 100644 --- a/cmake/server.cmake +++ b/cmake/server.cmake @@ -22,6 +22,7 @@ set(TRACY_SERVER_SOURCES TracyTextureCompression.cpp TracyThreadCompress.cpp TracyWorker.cpp + TracyContext.cpp ) list(TRANSFORM TRACY_SERVER_SOURCES PREPEND "${TRACY_SERVER_DIR}/") diff --git a/profiler/src/profiler/TracyTimelineItemGpu.cpp b/profiler/src/profiler/TracyTimelineItemGpu.cpp index b499c64f17..e462913e07 100644 --- a/profiler/src/profiler/TracyTimelineItemGpu.cpp +++ b/profiler/src/profiler/TracyTimelineItemGpu.cpp @@ -31,7 +31,7 @@ const char* TimelineItemGpu::HeaderLabel() const } else { - sprintf( buf, "%s context %i", GpuContextNames[(int)m_gpu->type], m_idx ); + sprintf( buf, "%s context %i", ZoneContextNames[(int)m_gpu->type], m_idx ); } return buf; } @@ -40,13 +40,13 @@ void TimelineItemGpu::HeaderTooltip( const char* label ) const { const bool dynamicColors = m_view.GetViewData().dynamicColors; const bool isMultithreaded = - ( m_gpu->type == GpuContextType::Vulkan ) || - ( m_gpu->type == GpuContextType::OpenCL ) || - ( m_gpu->type == GpuContextType::Direct3D12 ) || - ( m_gpu->type == GpuContextType::Metal ); + ( m_gpu->type == ZoneContextType::Vulkan ) || + ( m_gpu->type == ZoneContextType::OpenCL ) || + ( m_gpu->type == ZoneContextType::Direct3D12 ) || + ( m_gpu->type == ZoneContextType::Metal ); char buf[64]; - sprintf( buf, "%s context %i", GpuContextNames[(int)m_gpu->type], m_idx ); + sprintf( buf, "%s context %i", ZoneContextNames[(int)m_gpu->type], m_idx ); ImGui::BeginTooltip(); if( m_gpu->name.Active() ) TextFocused( "Name:", m_worker.GetString( m_gpu->name ) ); @@ -66,18 +66,7 @@ void TimelineItemGpu::HeaderTooltip( const char* label ) const auto tid = it->first; if( tid == 0 ) { - if( !it->second.timeline.empty() ) - { - if( it->second.timeline.is_magic() ) - { - auto& tl = *(Vector*)&it->second.timeline; - tid = m_worker.DecompressThread( tl.begin()->Thread() ); - } - else - { - tid = m_worker.DecompressThread( (*it->second.timeline.begin())->Thread() ); - } - } + tid = it->second->id; } SmallColorBox( GetThreadColor( tid, 0, dynamicColors ) ); ImGui::SameLine(); @@ -134,7 +123,7 @@ void TimelineItemGpu::HeaderExtraContents( const TimelineContext& ctx, int offse const auto ty = ImGui::GetTextLineHeight(); char buf[64]; - sprintf( buf, "%s context %i", GpuContextNames[(int)m_gpu->type], m_idx ); + sprintf( buf, "%s context %i", ZoneContextNames[(int)m_gpu->type], m_idx ); draw->AddText( ctx.wpos + ImVec2( ty * 1.5f + labelWidth, offset ), HeaderColorInactive(), buf ); } } @@ -145,13 +134,13 @@ int64_t TimelineItemGpu::RangeBegin() const for( auto& td : m_gpu->threadData ) { int64_t t0; - if( td.second.timeline.is_magic() ) + if( td.second->timeline.is_magic() ) { - t0 = ((Vector*)&td.second.timeline)->front().GpuStart(); + t0 = ((Vector*)&td.second->timeline)->front().Start(); } else { - t0 = td.second.timeline.front()->GpuStart(); + t0 = td.second->timeline.front()->Start(); } if( t0 >= 0 ) { @@ -167,23 +156,23 @@ int64_t TimelineItemGpu::RangeEnd() const for( auto& td : m_gpu->threadData ) { int64_t t0; - if( td.second.timeline.is_magic() ) + if( td.second->timeline.is_magic() ) { - t0 = ((Vector*)&td.second.timeline)->front().GpuStart(); + t0 = ((Vector*)&td.second->timeline)->front().Start(); } else { - t0 = td.second.timeline.front()->GpuStart(); + t0 = td.second->timeline.front()->Start(); } if( t0 >= 0 ) { - if( td.second.timeline.is_magic() ) + if( td.second->timeline.is_magic() ) { - t = std::max( t, std::min( m_worker.GetLastTime(), m_worker.GetZoneEnd( ((Vector*)&td.second.timeline)->back() ) ) ); + t = std::max( t, std::min( m_worker.GetLastTime(), m_worker.GetZoneEnd( ((Vector*)&td.second->timeline)->back() ) ) ); } else { - t = std::max( t, std::min( m_worker.GetLastTime(), m_worker.GetZoneEnd( *td.second.timeline.back() ) ) ); + t = std::max( t, std::min( m_worker.GetLastTime(), m_worker.GetZoneEnd( *td.second->timeline.back() ) ) ); } } } diff --git a/profiler/src/profiler/TracyTimelineItemGpu.hpp b/profiler/src/profiler/TracyTimelineItemGpu.hpp index 5bb645b034..c6259e87fd 100644 --- a/profiler/src/profiler/TracyTimelineItemGpu.hpp +++ b/profiler/src/profiler/TracyTimelineItemGpu.hpp @@ -2,6 +2,7 @@ #define __TRACYTIMELINEITEMGPU_HPP__ #include "TracyEvent.hpp" +#include "TracyContext.hpp" #include "TracyTimelineItem.hpp" namespace tracy diff --git a/profiler/src/profiler/TracyTimelineItemThread.cpp b/profiler/src/profiler/TracyTimelineItemThread.cpp index 04f6e3d8d5..a30816f3ab 100644 --- a/profiler/src/profiler/TracyTimelineItemThread.cpp +++ b/profiler/src/profiler/TracyTimelineItemThread.cpp @@ -34,9 +34,10 @@ bool TimelineItemThread::IsEmpty() const { auto& crash = m_worker.GetCrashEvent(); return crash.thread != m_thread->id && - m_thread->timeline.empty() && - m_thread->messages.empty() && - m_thread->ghostZones.empty(); + m_thread->timeline.empty() && + m_thread->messages.empty() && + ( m_thread->ctx->type != ZoneContextType::CPU || + static_cast( m_thread )->ghostZones.empty() ); } uint32_t TimelineItemThread::HeaderColor() const @@ -220,14 +221,18 @@ void TimelineItemThread::HeaderTooltip( const char* label ) const { TextFocused( "Running state regions:", RealToString( ctx->v.size() ) ); } - if( !m_thread->samples.empty() ) + if( m_thread->ctx->type != ZoneContextType::CPU ) { - TextFocused( "Call stack samples:", RealToString( m_thread->samples.size() ) ); - if( m_thread->kernelSampleCnt != 0 ) + auto cpu_thread = static_cast( m_thread ); + if( !cpu_thread->samples.empty() ) { - TextFocused( "Kernel samples:", RealToString( m_thread->kernelSampleCnt ) ); - ImGui::SameLine(); - ImGui::TextDisabled( "(%.2f%%)", 100.f * m_thread->kernelSampleCnt / m_thread->samples.size() ); + TextFocused( "Call stack samples:", RealToString( cpu_thread->samples.size() ) ); + if( cpu_thread->kernelSampleCnt != 0 ) + { + TextFocused( "Kernel samples:", RealToString( cpu_thread->kernelSampleCnt ) ); + ImGui::SameLine(); + ImGui::TextDisabled( "(%.2f%%)", 100.f * cpu_thread->kernelSampleCnt / cpu_thread->samples.size() ); + } } } ImGui::EndTooltip(); @@ -238,21 +243,25 @@ void TimelineItemThread::HeaderExtraContents( const TimelineContext& ctx, int of m_view.DrawThreadMessagesList( ctx, m_msgDraw, offset, m_thread->id ); #ifndef TRACY_NO_STATISTICS - const bool hasGhostZones = m_worker.AreGhostZonesReady() && !m_thread->ghostZones.empty(); - if( hasGhostZones && !m_thread->timeline.empty() ) + if( m_thread->ctx->type == ZoneContextType::CPU ) { - auto draw = ImGui::GetWindowDrawList(); - const auto ty = ImGui::GetTextLineHeight(); + auto cpu_thread = static_cast( m_thread ); + const bool hasGhostZones = m_worker.AreGhostZonesReady() && !cpu_thread->ghostZones.empty(); + if( hasGhostZones && !m_thread->timeline.empty() ) + { + auto draw = ImGui::GetWindowDrawList(); + const auto ty = ImGui::GetTextLineHeight(); - const auto color = m_ghost ? 0xFFAA9999 : 0x88AA7777; - draw->AddText( ctx.wpos + ImVec2( 1.5f * ty + labelWidth, offset ), color, ICON_FA_GHOST ); - float ghostSz = ImGui::CalcTextSize( ICON_FA_GHOST ).x; + const auto color = m_ghost ? 0xFFAA9999 : 0x88AA7777; + draw->AddText( ctx.wpos + ImVec2( 1.5f * ty + labelWidth, offset ), color, ICON_FA_GHOST ); + float ghostSz = ImGui::CalcTextSize( ICON_FA_GHOST ).x; - if( ctx.hover && ImGui::IsMouseHoveringRect( ctx.wpos + ImVec2( 1.5f * ty + labelWidth, offset ), ctx.wpos + ImVec2( 1.5f * ty + labelWidth + ghostSz, offset + ty ) ) ) - { - if( IsMouseClicked( 0 ) ) + if( ctx.hover && ImGui::IsMouseHoveringRect( ctx.wpos + ImVec2( 1.5f * ty + labelWidth, offset ), ctx.wpos + ImVec2( 1.5f * ty + labelWidth + ghostSz, offset + ty ) ) ) { - m_ghost = !m_ghost; + if( IsMouseClicked( 0 ) ) + { + m_ghost = !m_ghost; + } } } } @@ -309,9 +318,10 @@ void TimelineItemThread::Preprocess( const TimelineContext& ctx, TaskDispatch& t td.Queue( [this, &ctx, visible] { #ifndef TRACY_NO_STATISTICS - if( m_worker.AreGhostZonesReady() && ( m_ghost || ( m_view.GetViewData().ghostZones && m_thread->timeline.empty() ) ) ) + if( m_thread->ctx->type == ZoneContextType::CPU && m_worker.AreGhostZonesReady() && ( m_ghost || ( m_view.GetViewData().ghostZones && m_thread->timeline.empty() ) ) ) { - m_depth = PreprocessGhostLevel( ctx, m_thread->ghostZones, 0, visible ); + auto cpu_thread = static_cast( m_thread ); + m_depth = PreprocessGhostLevel( ctx, cpu_thread->ghostZones, 0, visible ); } else #endif @@ -336,12 +346,16 @@ void TimelineItemThread::Preprocess( const TimelineContext& ctx, TaskDispatch& t } } - m_hasSamples = false; - if( vd.drawSamples && !m_thread->samples.empty() ) + if( m_thread->ctx->type == ZoneContextType::CPU ) { - td.Queue( [this, &ctx, visible, yPos] { - PreprocessSamples( ctx, m_thread->samples, visible, yPos ); - } ); + auto cpu_thread = static_cast( m_thread ); + m_hasSamples = false; + if( vd.drawSamples && !cpu_thread->samples.empty() ) + { + td.Queue( [this, &ctx, visible, yPos, cpu_thread] { + PreprocessSamples( ctx, cpu_thread->samples, visible, yPos ); + } ); + } } m_hasMessages = false; @@ -525,7 +539,6 @@ void TimelineItemThread::PreprocessContextSwitches( const TimelineContext& ctx, if( !visible ) return; const auto MinCtxNs = int64_t( round( GetScale() * MinCtxSize * nspx ) ); - const auto& sampleData = m_thread->samples; bool first = true; while( it < citend ) @@ -535,8 +548,9 @@ void TimelineItemThread::PreprocessContextSwitches( const TimelineContext& ctx, { first = false; } - else + else if ( m_thread->ctx->type == ZoneContextType::CPU ) { + const Vector& sampleData = static_cast( m_thread )->samples; uint32_t waitStack = 0; if( !sampleData.empty() ) { diff --git a/profiler/src/profiler/TracyTimelineItemThread.hpp b/profiler/src/profiler/TracyTimelineItemThread.hpp index 9e856d425c..01703d6194 100644 --- a/profiler/src/profiler/TracyTimelineItemThread.hpp +++ b/profiler/src/profiler/TracyTimelineItemThread.hpp @@ -2,6 +2,7 @@ #define __TRACYTIMELINEITEMTHREAD_HPP__ #include "TracyEvent.hpp" +#include "TracyContext.hpp" #include "TracyTimelineDraw.hpp" #include "TracyTimelineItem.hpp" diff --git a/profiler/src/profiler/TracyUtility.hpp b/profiler/src/profiler/TracyUtility.hpp index c9a2e75dce..6fc69047d7 100644 --- a/profiler/src/profiler/TracyUtility.hpp +++ b/profiler/src/profiler/TracyUtility.hpp @@ -7,6 +7,7 @@ #include "imgui.h" #include "../server/TracyEvent.hpp" +#include "../server/TracyContext.hpp" namespace tracy { diff --git a/profiler/src/profiler/TracyView.cpp b/profiler/src/profiler/TracyView.cpp index 87c658a69e..150ee33cf2 100644 --- a/profiler/src/profiler/TracyView.cpp +++ b/profiler/src/profiler/TracyView.cpp @@ -1140,7 +1140,6 @@ bool View::DrawImpl() ImGui::End(); m_zoneHighlight = nullptr; - m_gpuHighlight = nullptr; DrawInfoWindow(); diff --git a/profiler/src/profiler/TracyView.hpp b/profiler/src/profiler/TracyView.hpp index 23f308eded..a6aba5929c 100644 --- a/profiler/src/profiler/TracyView.hpp +++ b/profiler/src/profiler/TracyView.hpp @@ -37,19 +37,6 @@ namespace tracy { -constexpr const char* GpuContextNames[] = { - "Invalid", - "OpenGL", - "Vulkan", - "OpenCL", - "Direct3D 12", - "Direct3D 11", - "Metal", - "Custom", - "CUDA", - "Rocprof" -}; - struct MemoryPage; class FileRead; class SourceView; @@ -177,6 +164,8 @@ class View Range m_flameRange; Range m_waitStackRange; + uint64_t GetZoneThread( const ZoneEvent& zone ) const; + private: enum class ShortcutAction : uint8_t { @@ -252,14 +241,14 @@ class View void DrawTimelineFrames( const FrameData& frames ); void DrawTimeline(); void DrawSampleList( const TimelineContext& ctx, const std::vector& drawList, const Vector& vec, int offset ); - void DrawZoneList( const TimelineContext& ctx, const std::vector& drawList, int offset, uint64_t tid, int maxDepth, double margin ); + void DrawZoneList( const TimelineContext& ctx, const std::vector& drawList, int offset, const ThreadData& thread, int maxDepth, double margin ); void DrawThreadCropper( const int depth, const uint64_t tid, const float xPos, const float yPos, const float ostep, const float cropperWidth, const bool hasCtxSwitches ); void DrawContextSwitchList( const TimelineContext& ctx, const std::vector& drawList, const Vector& ctxSwitch, int offset, int endOffset, bool isFiber ); - int DispatchGpuZoneLevel( const Vector>& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int offset, int depth, uint64_t thread, float yMin, float yMax, int64_t begin, int drift ); + int DispatchGpuZoneLevel( const Vector>& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int offset, int depth, const ThreadData& thread, float yMin, float yMax, int64_t begin, int drift ); template - int DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int offset, int depth, uint64_t thread, float yMin, float yMax, int64_t begin, int drift ); + int DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int offset, int depth, const ThreadData& thread, float yMin, float yMax, int64_t begin, int drift ); template - int SkipGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int offset, int depth, uint64_t thread, float yMin, float yMax, int64_t begin, int drift ); + int SkipGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int offset, int depth, const ThreadData& thread, float yMin, float yMax, int64_t begin, int drift ); void DrawLockHeader( uint32_t id, const LockMap& lockmap, const SourceLocation& srcloc, bool hover, ImDrawList* draw, const ImVec2& wpos, float w, float ty, float offset, uint8_t tid ); int DrawLocks( const TimelineContext& ctx, const std::vector>& lockDraw, uint64_t tid, int _offset, LockHighlight& highlight ); void DrawPlotPoint( const ImVec2& wpos, float x, float y, int offset, uint32_t color, bool hover, bool hasPrev, const PlotItem& item, double prev, PlotType type, PlotValueFormatting format, float PlotHeight, uint64_t name ); @@ -318,7 +307,7 @@ class View void DrawGpuInfoWindow(); template - void DrawZoneInfoChildren( const V& children, int64_t ztime ); + void DrawZoneInfoChildren( const V& children, int64_t ztime, const ThreadData& thread ); template void DrawGpuInfoChildren( const V& children, int64_t ztime ); @@ -333,21 +322,16 @@ class View uint32_t GetSrcLocColor( const SourceLocation& srcloc, int depth ); uint32_t GetRawSrcLocColor( const SourceLocation& srcloc, int depth ); uint32_t GetZoneColor( const ZoneEvent& ev, uint64_t thread, int depth ); - uint32_t GetZoneColor( const GpuEvent& ev ); ZoneColorData GetZoneColorData( const ZoneEvent& ev, uint64_t thread, int depth, uint32_t inheritedColor ); - ZoneColorData GetZoneColorData( const GpuEvent& ev ); void ZoomToZone( const ZoneEvent& ev ); - void ZoomToZone( const GpuEvent& ev ); void ZoomToPrevFrame(); void ZoomToNextFrame(); void CenterAtTime( int64_t t ); void ShowZoneInfo( const ZoneEvent& ev ); - void ShowZoneInfo( const GpuEvent& ev, uint64_t thread ); - void ZoneTooltip( const ZoneEvent& ev ); - void ZoneTooltip( const GpuEvent& ev ); + void ZoneTooltip( const ZoneEvent& ev, const ThreadData& thread ); void CallstackTooltip( uint32_t idx ); void CallstackTooltipContents( uint32_t idx ); void CrashTooltip(); @@ -356,14 +340,11 @@ class View const ZoneEvent* GetZoneParent( const ZoneEvent& zone, uint64_t tid ) const; const ZoneEvent* GetZoneChild( const ZoneEvent& zone, int64_t time ) const; bool IsZoneReentry( const ZoneEvent& zone ) const; - bool IsZoneReentry( const ZoneEvent& zone, uint64_t tid ) const; - const GpuEvent* GetZoneParent( const GpuEvent& zone ) const; + bool IsZoneReentry( const ZoneEvent& zone, uint64_t tid, const ZoneContext* ctx ) const; const ThreadData* GetZoneThreadData( const ZoneEvent& zone ) const; - uint64_t GetZoneThread( const ZoneEvent& zone ) const; - uint64_t GetZoneThread( const GpuEvent& zone ) const; - const GpuCtxData* GetZoneCtx( const GpuEvent& zone ) const; + const ZoneContext* GetZoneCtx( const ZoneEvent& zone ) const; bool FindMatchingZone( int prev0, int prev1, int flags ); - const ZoneEvent* FindZoneAtTime( uint64_t thread, int64_t time ) const; + const pair FindZoneAtTime( uint64_t thread, int64_t time ) const; uint64_t GetFrameNumber( const FrameData& fd, int i ) const; const char* GetFrameText( const FrameData& fd, int i, uint64_t ftime ) const; const char* GetFrameSetName( const FrameData& fd ) const; @@ -380,11 +361,9 @@ class View void DrawCallstackCalls( uint32_t callstack, uint16_t limit ) const; void SetViewToLastFrames(); int64_t GetZoneChildTime( const ZoneEvent& zone ); - int64_t GetZoneChildTime( const GpuEvent& zone ); int64_t GetZoneChildTimeFast( const ZoneEvent& zone ); int64_t GetZoneChildTimeFastClamped( const ZoneEvent& zone, int64_t t0, int64_t t1 ); int64_t GetZoneSelfTime( const ZoneEvent& zone ); - int64_t GetZoneSelfTime( const GpuEvent& zone ); bool GetZoneRunningTime( const ContextSwitch* ctx, const ZoneEvent& ev, int64_t& time, uint64_t& cnt ); bool GetZoneRunningTime( const ContextSwitch* ctx, const ZoneEvent& ev, const RangeSlim& range, int64_t& time, uint64_t& cnt ); const char* GetThreadContextData( uint64_t thread, bool& local, bool& untracked, const char*& program ); @@ -401,11 +380,12 @@ class View void Attention( bool& alreadyDone ); void UpdateTitle(); + void ContextCombo( const std::string* ctxName, uint8_t* ctxIdx ); unordered_flat_map m_threadDepthLimit; unordered_flat_map m_visibleMsgThread; unordered_flat_map m_waitStackThread; - unordered_flat_map m_flameGraphThread; + unordered_flat_map> m_flameGraphThread; unordered_flat_map m_gpuDrift; unordered_flat_map m_plotView; Vector m_threadOrder; @@ -432,14 +412,19 @@ class View return it->second; } - tracy_force_inline bool& FlameGraphThread( uint64_t thread ) + tracy_force_inline bool& FlameGraphThread( ZoneContext* ctx, uint64_t thread ) { - auto it = m_flameGraphThread.find( thread ); + auto it = m_flameGraphThread.find( ctx ); if( it == m_flameGraphThread.end() ) { - it = m_flameGraphThread.emplace( thread, true ).first; + it = m_flameGraphThread.emplace( ctx, decltype( m_flameGraphThread )::mapped_type() ).first; } - return it->second; + auto it2 = it->second.find( thread ); + if( it2 == it->second.end() ) + { + it2 = it->second.emplace( thread, true ).first; + } + return it2->second; } tracy_force_inline int& GpuDrift( const void* ptr ) @@ -479,9 +464,6 @@ class View DecayValue m_msgHighlight = nullptr; DecayValue m_lockHoverHighlight = InvalidId; DecayValue m_msgToFocus = nullptr; - const GpuEvent* m_gpuInfoWindow = nullptr; - const GpuEvent* m_gpuHighlight; - uint64_t m_gpuInfoWindowThread; uint32_t m_callstackInfoWindow = 0; int64_t m_memoryAllocInfoWindow = -1; uint64_t m_memoryAllocInfoPool = 0; @@ -511,7 +493,7 @@ class View ImGuiTextFilter m_statisticsFilter; ImGuiTextFilter m_statisticsImageFilter; ImGuiTextFilter m_userTextFilter; - unordered_flat_set m_filteredZones; + unordered_flat_set m_filteredZones; Region m_highlight; Region m_highlightZoom; @@ -530,8 +512,12 @@ class View bool m_showAnnotationList = false; bool m_showWaitStacks = false; bool m_showFlameGraph = false; + std::string* m_flameCtxName; + uint8_t m_flameCtx = 0; AccumulationMode m_statAccumulationMode = AccumulationMode::SelfOnly; + std::string* m_statCtxName; + uint8_t m_statCtx = 0; bool m_statSampleTime = true; int m_statMode = 0; bool m_shortImageNames = true; @@ -575,7 +561,6 @@ class View BuzzAnim m_statBuzzAnim; Vector m_zoneInfoStack; - Vector m_gpuInfoStack; SourceContents m_srcHintCache; std::unique_ptr m_sourceView; @@ -782,7 +767,7 @@ class View } } m_findZone; - tracy_force_inline uint64_t GetSelectionTarget( const Worker::ZoneThreadData& ev, FindZone::GroupBy groupBy ) const; + tracy_force_inline uint64_t GetSelectionTarget( const ZoneContext::ZoneThreadData& ev, FindZone::GroupBy groupBy ) const; struct CompVal { @@ -877,8 +862,6 @@ class View struct { std::pair zoneSelfTime = { nullptr, 0 }; std::pair zoneSelfTime2 = { nullptr, 0 }; - std::pair gpuSelfTime = { nullptr, 0 }; - std::pair gpuSelfTime2 = { nullptr, 0 }; } m_cache; struct { diff --git a/profiler/src/profiler/TracyView_Compare.cpp b/profiler/src/profiler/TracyView_Compare.cpp index 4c09e8d3be..167edbe394 100644 --- a/profiler/src/profiler/TracyView_Compare.cpp +++ b/profiler/src/profiler/TracyView_Compare.cpp @@ -25,7 +25,8 @@ extern double s_time; auto it = m_compare.match[0].begin(); while( it != m_compare.match[0].end() ) { - if( m_worker.GetZonesForSourceLocation( *it ).zones.empty() ) + auto p = m_worker.GetZonesForSourceLocation( *it ); + if( p.second == nullptr || p.first.zones.empty() ) { it = m_compare.match[0].erase( it ); } @@ -42,7 +43,8 @@ extern double s_time; auto it = m_compare.match[1].begin(); while( it != m_compare.match[1].end() ) { - if( m_compare.second->GetZonesForSourceLocation( *it ).zones.empty() ) + auto p = m_compare.second->GetZonesForSourceLocation( *it ); + if( p.second == nullptr || p.first.zones.empty() ) { it = m_compare.match[1].erase( it ); } @@ -533,8 +535,9 @@ void View::DrawCompare() int idx = 0; for( auto& v : m_compare.match[0] ) { + auto p = m_worker.GetZonesForSourceLocation( v ); auto& srcloc = m_worker.GetSourceLocation( v ); - auto& zones = m_worker.GetZonesForSourceLocation( v ).zones; + auto& zones = p.first.zones; SmallColorBox( GetSrcLocColor( srcloc, 0 ) ); ImGui::SameLine(); ImGui::PushID( idx ); @@ -542,6 +545,8 @@ void View::DrawCompare() ImGui::RadioButton( m_worker.GetString( srcloc.name.active ? srcloc.name : srcloc.function ), &m_compare.selMatch[0], idx++ ); ImGui::PopStyleVar(); ImGui::SameLine(); + ImGui::TextColored( ImVec4( 1.0, 1.0, 0.0, 1 ), "[%s]", m_worker.GetCtxName( p.second ).c_str() ); + ImGui::SameLine(); ImGui::TextColored( ImVec4( 0.5, 0.5, 0.5, 1 ), "(%s) %s", RealToString( zones.size() ), LocationToString( m_worker.GetString( srcloc.file ), srcloc.line ) ); ImGui::PopID(); } @@ -551,13 +556,16 @@ void View::DrawCompare() idx = 0; for( auto& v : m_compare.match[1] ) { + auto p = m_compare.second->GetZonesForSourceLocation( v ); auto& srcloc = m_compare.second->GetSourceLocation( v ); - auto& zones = m_compare.second->GetZonesForSourceLocation( v ).zones; + auto& zones = p.first.zones; ImGui::PushID( -1 - idx ); ImGui::PushStyleVar( ImGuiStyleVar_FramePadding, ImVec2( 0, 0 ) ); ImGui::RadioButton( m_compare.second->GetString( srcloc.name.active ? srcloc.name : srcloc.function ), &m_compare.selMatch[1], idx++ ); ImGui::PopStyleVar(); ImGui::SameLine(); + ImGui::TextColored( ImVec4( 1.0, 0.0, 0.0, 1 ), "[%s]", m_compare.second->GetCtxName( p.second ).c_str() ); + ImGui::SameLine(); ImGui::TextColored( ImVec4( 0.5, 0.5, 0.5, 1 ), "(%s) %s", RealToString( zones.size() ), LocationToString( m_compare.second->GetString( srcloc.file ), srcloc.line ) ); ImGui::PopID(); } @@ -705,8 +713,8 @@ void View::DrawCompare() if( m_compare.compareMode == 0 ) { - auto& zoneData0 = m_worker.GetZonesForSourceLocation( m_compare.match[0][m_compare.selMatch[0]] ); - auto& zoneData1 = m_compare.second->GetZonesForSourceLocation( m_compare.match[1][m_compare.selMatch[1]] ); + auto& zoneData0 = m_worker.GetZonesForSourceLocation( m_compare.match[0][m_compare.selMatch[0]] ).first; + auto& zoneData1 = m_compare.second->GetZonesForSourceLocation( m_compare.match[1][m_compare.selMatch[1]] ).first; auto& zones0 = zoneData0.zones; auto& zones1 = zoneData1.zones; zones0.ensure_sorted(); diff --git a/profiler/src/profiler/TracyView_ContextSwitch.cpp b/profiler/src/profiler/TracyView_ContextSwitch.cpp index 0be3f9e535..62ff70497c 100644 --- a/profiler/src/profiler/TracyView_ContextSwitch.cpp +++ b/profiler/src/profiler/TracyView_ContextSwitch.cpp @@ -69,7 +69,7 @@ const char* View::DecodeContextSwitchReason( uint8_t reason ) { case ContextSwitchData::Win32_Executive: return "(Thread is waiting for the scheduler)"; case ContextSwitchData::Win32_FreePage: return "(Thread is waiting for a free virtual memory page)"; - case ContextSwitchData::Win32_PageIn: return "(Thread is waiting for a virtual memory page to arrive in memory)"; + case ContextSwitchData::Win32_PageIn: return "(Thread is waiting for a virtual memory page to arrive in memory)"; case ContextSwitchData::Win32_PoolAllocation: return "(Thread is waiting for a system allocation)"; case ContextSwitchData::Win32_DelayExecution: return "(Thread execution is delayed)"; case ContextSwitchData::Win32_Suspended: return "(Thread execution is suspended)"; @@ -399,6 +399,7 @@ void View::DrawWaitStacks() { if( WaitStackThread( t->id ) ) { + const CPUThreadData* t = static_cast(t); auto it = t->ctxSwitchSamples.begin(); auto end = t->ctxSwitchSamples.end(); if( m_waitStackRange.active ) @@ -507,8 +508,9 @@ void View::DrawWaitStacks() } int idx = 0; - for( const auto& t : m_threadOrder ) + for( const auto& td : m_threadOrder ) { + const CPUThreadData* t = static_cast(td); if( t->ctxSwitchSamples.empty() ) continue; ImGui::PushID( idx++ ); const auto threadColor = GetThreadColor( t->id, 0 ); diff --git a/profiler/src/profiler/TracyView_CpuData.cpp b/profiler/src/profiler/TracyView_CpuData.cpp index 42a3a5e1f6..60a9e0e9e4 100644 --- a/profiler/src/profiler/TracyView_CpuData.cpp +++ b/profiler/src/profiler/TracyView_CpuData.cpp @@ -389,7 +389,7 @@ bool View::DrawCpuData( const TimelineContext& ctx, const std::vector>& zones ) { ZoomToZone( *ev ); } - ZoneTooltip( *ev ); + ZoneTooltip( *ev, *GetZoneThreadData( *ev ) ); m_zoneHover2 = ev; } @@ -378,8 +379,9 @@ void View::DrawFindZone() int idx = 0; for( auto& v : m_findZone.match ) { + auto p = m_worker.GetZonesForSourceLocation( v ); auto& srcloc = m_worker.GetSourceLocation( v ); - auto& zones = m_worker.GetZonesForSourceLocation( v ).zones; + auto& zones = p.first.zones; SmallColorBox( GetSrcLocColor( srcloc, 0 ) ); ImGui::SameLine(); ImGui::PushID( idx ); @@ -396,6 +398,8 @@ void View::DrawFindZone() { ImGui::SameLine(); } + ImGui::TextColored( ImVec4( 1.0, 1.0, 0.0, 1 ), "[%s]", m_worker.GetCtxName( p.second ).c_str() ); + ImGui::SameLine(); const auto fileName = m_worker.GetString( srcloc.file ); ImGui::TextColored( ImVec4( 0.5, 0.5, 0.5, 1 ), "(%s) %s", RealToString( zones.size() ), LocationToString( fileName, srcloc.line ) ); if( ImGui::IsItemHovered() ) @@ -430,7 +434,7 @@ void View::DrawFindZone() ImGui::Separator(); - auto& zoneData = m_worker.GetZonesForSourceLocation( m_findZone.match[m_findZone.selMatch] ); + auto& zoneData = m_worker.GetZonesForSourceLocation( m_findZone.match[m_findZone.selMatch] ).first; auto& zones = zoneData.zones; zones.ensure_sorted(); if( ImGui::TreeNodeEx( "Histogram", ImGuiTreeNodeFlags_DefaultOpen ) ) diff --git a/profiler/src/profiler/TracyView_FlameGraph.cpp b/profiler/src/profiler/TracyView_FlameGraph.cpp index 63530c0fb2..8918aae526 100644 --- a/profiler/src/profiler/TracyView_FlameGraph.cpp +++ b/profiler/src/profiler/TracyView_FlameGraph.cpp @@ -735,6 +735,11 @@ void View::DrawFlameGraph() ImGui::Begin( "Flame graph", &m_showFlameGraph, ImGuiWindowFlags_NoScrollbar | ImGuiWindowFlags_NoScrollWithMouse ); if( ImGui::GetCurrentWindowRead()->SkipItems ) { ImGui::End(); return; } + ContextCombo( m_flameCtxName, &m_flameCtx ); + auto ctx = m_worker.GetCtxData()[m_flameCtx]; + + ImGui::SameLine(); + ImGui::PushStyleVar( ImGuiStyleVar_FramePadding, ImVec2( 2, 2 ) ); if( ImGui::RadioButton( ICON_FA_SYRINGE " Instrumentation", &m_flameMode, 0 ) ) m_flameGraphInvariant.Reset(); @@ -798,14 +803,14 @@ void View::DrawFlameGraph() ToggleButton( ICON_FA_RULER " Limits", m_showRanges ); } - auto& td = m_worker.GetThreadData(); + auto& td = ctx->threads; auto expand = ImGui::TreeNode( ICON_FA_SHUFFLE " Visible threads:" ); ImGui::SameLine(); size_t visibleThreads = 0; size_t tsz = 0; for( const auto& t : td ) { - if( FlameGraphThread( t->id ) ) visibleThreads++; + if( FlameGraphThread( ctx, t->id ) ) visibleThreads++; tsz++; } if( visibleThreads == tsz ) @@ -823,7 +828,7 @@ void View::DrawFlameGraph() { for( const auto& t : td ) { - FlameGraphThread( t->id ) = true; + FlameGraphThread( ctx, t->id ) = true; } m_flameGraphInvariant.Reset(); } @@ -832,7 +837,7 @@ void View::DrawFlameGraph() { for( const auto& t : td ) { - FlameGraphThread( t->id ) = false; + FlameGraphThread( ctx, t->id ) = false; } m_flameGraphInvariant.Reset(); } @@ -844,7 +849,7 @@ void View::DrawFlameGraph() const auto threadColor = GetThreadColor( t->id, 0 ); SmallColorBox( threadColor ); ImGui::SameLine(); - if( SmallCheckbox( m_worker.GetThreadName( t->id ), &FlameGraphThread( t->id ) ) ) m_flameGraphInvariant.Reset(); + if( SmallCheckbox( m_worker.GetThreadName( t->id ), &FlameGraphThread( ctx, t->id ) ) ) m_flameGraphInvariant.Reset(); ImGui::PopID(); if( t->isFiber ) { @@ -865,7 +870,7 @@ void View::DrawFlameGraph() m_flameGraphInvariant.range = m_flameRange; size_t sz = 0; - for( auto& thread : td ) if( FlameGraphThread( thread->id ) ) sz++; + for( auto& thread : td ) if( FlameGraphThread( ctx, thread->id ) ) sz++; std::vector> threadData; threadData.resize( sz ); @@ -875,7 +880,7 @@ void View::DrawFlameGraph() { for( auto& thread : td ) { - if( FlameGraphThread( thread->id ) ) + if( FlameGraphThread( ctx, thread->id ) ) { if( m_flameRunningTime ) { @@ -904,10 +909,14 @@ void View::DrawFlameGraph() { for( auto& thread : td ) { - if( FlameGraphThread( thread->id ) ) + if( FlameGraphThread( ctx, thread->id ) ) { m_td.Queue( [this, idx, thread, &threadData] { - BuildFlameGraph( m_worker, threadData[idx], thread->samples ); + if( thread->ctx->type == ZoneContextType::CPU ) + { + const Vector& sampleData = static_cast( thread )->samples; + BuildFlameGraph( m_worker, threadData[idx], sampleData ); + } } ); idx++; } diff --git a/profiler/src/profiler/TracyView_FrameOverview.cpp b/profiler/src/profiler/TracyView_FrameOverview.cpp index bdb9b9e352..3c9aeeb9ab 100644 --- a/profiler/src/profiler/TracyView_FrameOverview.cpp +++ b/profiler/src/profiler/TracyView_FrameOverview.cpp @@ -259,9 +259,9 @@ void View::DrawFrames() int i = 0, idx = 0; #ifndef TRACY_NO_STATISTICS - if( m_worker.AreSourceLocationZonesReady() && m_findZone.show && m_findZone.showZoneInFrames && !m_findZone.match.empty() ) + if( m_worker.GetDefaultCtx().AreSourceLocationZonesReady() && m_findZone.show && m_findZone.showZoneInFrames && !m_findZone.match.empty() ) { - auto& zoneData = m_worker.GetZonesForSourceLocation( m_findZone.match[m_findZone.selMatch] ); + auto& zoneData = m_worker.GetDefaultCtx().GetZonesForSourceLocation( m_findZone.match[m_findZone.selMatch] ); zoneData.zones.ensure_sorted(); auto begin = zoneData.zones.begin(); while( i < onScreen && m_vd.frameStart + idx < total ) diff --git a/profiler/src/profiler/TracyView_GpuTimeline.cpp b/profiler/src/profiler/TracyView_GpuTimeline.cpp index 4b692d1c2c..78622f6633 100644 --- a/profiler/src/profiler/TracyView_GpuTimeline.cpp +++ b/profiler/src/profiler/TracyView_GpuTimeline.cpp @@ -36,17 +36,17 @@ bool View::DrawGpu( const TimelineContext& ctx, const GpuCtxData& gpu, int& offs for( auto& td : gpu.threadData ) { - auto& tl = td.second.timeline; + auto& tl = td.second->timeline; assert( !tl.empty() ); if( tl.is_magic() ) { - auto& tlm = *(Vector*)&tl; - if( tlm.front().GpuStart() >= 0 ) + auto& tlm = *(Vector*)&tl; + if( tlm.front().Start() >= 0 ) { - const auto begin = tlm.front().GpuStart(); + const auto begin = tlm.front().Start(); const auto drift = GpuDrift( &gpu ); if( !singleThread ) offset += sstep; - const auto partDepth = DispatchGpuZoneLevel( tl, hover, pxns, int64_t( nspx ), wpos, offset, 0, gpu.thread, yMin, yMax, begin, drift ); + const auto partDepth = DispatchGpuZoneLevel( tl, hover, pxns, int64_t( nspx ), wpos, offset, 0, *td.second, yMin, yMax, begin, drift ); if( partDepth != 0 ) { if( !singleThread ) @@ -68,12 +68,12 @@ bool View::DrawGpu( const TimelineContext& ctx, const GpuCtxData& gpu, int& offs } else { - if( tl.front()->GpuStart() >= 0 ) + if( tl.front()->Start() >= 0 ) { - const auto begin = tl.front()->GpuStart(); + const auto begin = tl.front()->Start(); const auto drift = GpuDrift( &gpu ); if( !singleThread ) offset += sstep; - const auto partDepth = DispatchGpuZoneLevel( tl, hover, pxns, int64_t( nspx ), wpos, offset, 0, gpu.thread, yMin, yMax, begin, drift ); + const auto partDepth = DispatchGpuZoneLevel( tl, hover, pxns, int64_t( nspx ), wpos, offset, 0, *td.second, yMin, yMax, begin, drift ); if( partDepth != 0 ) { if( !singleThread ) @@ -97,7 +97,7 @@ bool View::DrawGpu( const TimelineContext& ctx, const GpuCtxData& gpu, int& offs return depth != 0; } -int View::DispatchGpuZoneLevel( const Vector>& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int _offset, int depth, uint64_t thread, float yMin, float yMax, int64_t begin, int drift ) +int View::DispatchGpuZoneLevel( const Vector>& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int _offset, int depth, const ThreadData& thread, float yMin, float yMax, int64_t begin, int drift ) { const auto ty = ImGui::GetTextLineHeight(); const auto ostep = ty + 1; @@ -108,38 +108,38 @@ int View::DispatchGpuZoneLevel( const Vector>& vec, bool hov { if( vec.is_magic() ) { - return DrawGpuZoneLevel>( *(Vector*)&vec, hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); + return DrawGpuZoneLevel>( *(Vector*)&vec, hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); } else { - return DrawGpuZoneLevel>( vec, hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); + return DrawGpuZoneLevel>( vec, hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); } } else { if( vec.is_magic() ) { - return SkipGpuZoneLevel>( *(Vector*)&vec, hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); + return SkipGpuZoneLevel>( *(Vector*)&vec, hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); } else { - return SkipGpuZoneLevel>( vec, hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); + return SkipGpuZoneLevel>( vec, hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); } } } template -int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int _offset, int depth, uint64_t thread, float yMin, float yMax, int64_t begin, int drift ) +int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int _offset, int depth, const ThreadData& thread, float yMin, float yMax, int64_t begin, int drift ) { // cast to uint64_t, so that unended zones (end = -1) are still drawn - auto it = std::lower_bound( vec.begin(), vec.end(), std::max( 0, m_vd.zvStart ), [begin, drift] ( const auto& l, const auto& r ) { Adapter a; return (uint64_t)AdjustGpuTime( a(l).GpuEnd(), begin, drift ) < (uint64_t)r; } ); + auto it = std::lower_bound( vec.begin(), vec.end(), std::max( 0, m_vd.zvStart ), [begin, drift] ( const auto& l, const auto& r ) { Adapter a; return (uint64_t)AdjustGpuTime( a(l).End(), begin, drift ) < (uint64_t)r; } ); if( it == vec.end() ) return depth; Adapter a; - const auto zitend = std::lower_bound( it, vec.end(), std::max( 0, m_vd.zvEnd ), [begin, drift] ( const auto& l, const auto& r ) { Adapter a; return (uint64_t)AdjustGpuTime( a(l).GpuStart(), begin, drift ) < (uint64_t)r; } ); + const auto zitend = std::lower_bound( it, vec.end(), std::max( 0, m_vd.zvEnd ), [begin, drift] ( const auto& l, const auto& r ) { Adapter a; return (uint64_t)AdjustGpuTime( a(l).Start(), begin, drift ) < (uint64_t)r; } ); if( it == zitend ) return depth; - if( AdjustGpuTime( a(*(zitend-1)).GpuEnd(), begin, drift ) < m_vd.zvStart ) return depth; + if( AdjustGpuTime( a(*(zitend-1)).End(), begin, drift ) < m_vd.zvStart ) return depth; const auto w = ImGui::GetContentRegionAvail().x - 1; const auto ty = ImGui::GetTextLineHeight(); @@ -156,12 +156,12 @@ int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, auto& ev = a(*it); auto end = m_worker.GetZoneEnd( ev ); if( end == std::numeric_limits::max() ) break; - const auto start = AdjustGpuTime( ev.GpuStart(), begin, drift ); + const auto start = AdjustGpuTime( ev.Start(), begin, drift ); end = AdjustGpuTime( end, begin, drift ); const auto zsz = std::max( ( end - start ) * pxns, pxns * 0.5 ); if( zsz < MinVisSize ) { - const auto color = GetZoneColor( ev ); + const auto color = GetZoneColor( ev, thread.id, depth ); const auto MinVisNs = MinVisSize * nspx; int num = 0; const auto px0 = ( start - m_vd.zvStart ) * pxns; @@ -171,7 +171,7 @@ int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, for(;;) { const auto prevIt = it; - it = std::lower_bound( it, zitend, std::max( 0, nextTime ), [begin, drift] ( const auto& l, const auto& r ) { Adapter a; return (uint64_t)AdjustGpuTime( a(l).GpuEnd(), begin, drift ) < (uint64_t)r; } ); + it = std::lower_bound( it, zitend, std::max( 0, nextTime ), [begin, drift] ( const auto& l, const auto& r ) { Adapter a; return (uint64_t)AdjustGpuTime( a(l).End(), begin, drift ) < (uint64_t)r; } ); if( it == prevIt ) ++it; num += std::distance( prevIt, it ); if( it == zitend ) break; @@ -202,8 +202,8 @@ int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, } else { - const auto zoneThread = thread != 0 ? thread : m_worker.DecompressThread( ev.Thread() ); - ZoneTooltip( ev ); + const auto zoneThread = thread.id != 0 ? thread.id : GetZoneThread( ev ); + ZoneTooltip( ev, thread ); if( IsMouseClicked( 2 ) && rend - start > 0 ) { @@ -211,12 +211,13 @@ int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, } if( IsMouseClicked( 0 ) ) { - ShowZoneInfo( ev, zoneThread ); + ShowZoneInfo( ev ); } m_gpuThread = zoneThread; - m_gpuStart = ev.CpuStart(); - m_gpuEnd = ev.CpuEnd(); + const auto zoneExtra = m_worker.GetZoneExtra( ev ); + m_gpuStart = zoneExtra.otherStart.Val(); + m_gpuEnd = zoneExtra.otherEnd.Val(); } } const auto tmp = RealToString( num ); @@ -231,7 +232,7 @@ int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, { if( ev.Child() >= 0 ) { - const auto d = DispatchGpuZoneLevel( m_worker.GetGpuChildren( ev.Child() ), hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); + const auto d = DispatchGpuZoneLevel( m_worker.GetZoneChildren( ev.Child() ), hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); if( d > maxdepth ) maxdepth = d; } @@ -242,7 +243,7 @@ int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, const auto pr1 = ( end - m_vd.zvStart ) * pxns; const auto px0 = std::max( pr0, -10.0 ); const auto px1 = std::max( { std::min( pr1, double( w + 10 ) ), px0 + pxns * 0.5, px0 + MinVisSize } ); - const auto zoneColor = GetZoneColorData( ev ); + const auto zoneColor = GetZoneColorData( ev, thread.id, depth, 0 ); draw->AddRectFilled( wpos + ImVec2( px0, offset ), wpos + ImVec2( px1, offset + tsz.y ), zoneColor.color ); if( zoneColor.highlight ) { @@ -270,7 +271,7 @@ int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, DrawTextContrast( draw, wpos + ImVec2( std::max( std::max( 0., px0 ), std::min( double( w - tsz.x ), x ) ), offset ), 0xFFFFFFFF, zoneName ); ImGui::PopClipRect(); } - else if( ev.GpuStart() == ev.GpuEnd() ) + else if( ev.Start() == ev.End() ) { DrawTextContrast( draw, wpos + ImVec2( px0 + ( px1 - px0 - tsz.x ) * 0.5, offset ), 0xFFFFFFFF, zoneName ); } @@ -288,8 +289,8 @@ int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, if( hover && ImGui::IsMouseHoveringRect( wpos + ImVec2( px0, offset ), wpos + ImVec2( px1, offset + tsz.y + 1 ) ) ) { - const auto zoneThread = thread != 0 ? thread : m_worker.DecompressThread( ev.Thread() ); - ZoneTooltip( ev ); + const auto zoneThread = thread.id != 0 ? thread.id : GetZoneThread( ev ); + ZoneTooltip( ev, thread ); if( !m_zoomAnim.active && IsMouseClicked( 2 ) ) { @@ -297,12 +298,13 @@ int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, } if( IsMouseClicked( 0 ) ) { - ShowZoneInfo( ev, zoneThread ); + ShowZoneInfo( ev ); } m_gpuThread = zoneThread; - m_gpuStart = ev.CpuStart(); - m_gpuEnd = ev.CpuEnd(); + const auto zoneExtra = m_worker.GetZoneExtra( ev ); + m_gpuStart = zoneExtra.otherStart.Val(); + m_gpuEnd = zoneExtra.otherEnd.Val(); } ++it; @@ -312,17 +314,17 @@ int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, } template -int View::SkipGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int _offset, int depth, uint64_t thread, float yMin, float yMax, int64_t begin, int drift ) +int View::SkipGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int _offset, int depth, const ThreadData& thread, float yMin, float yMax, int64_t begin, int drift ) { // cast to uint64_t, so that unended zones (end = -1) are still drawn - auto it = std::lower_bound( vec.begin(), vec.end(), std::max( 0, m_vd.zvStart ), [begin, drift] ( const auto& l, const auto& r ) { Adapter a; return (uint64_t)AdjustGpuTime( a(l).GpuEnd(), begin, drift ) < (uint64_t)r; } ); + auto it = std::lower_bound( vec.begin(), vec.end(), std::max( 0, m_vd.zvStart ), [begin, drift] ( const auto& l, const auto& r ) { Adapter a; return (uint64_t)AdjustGpuTime( a(l).End(), begin, drift ) < (uint64_t)r; } ); if( it == vec.end() ) return depth; Adapter a; - const auto zitend = std::lower_bound( it, vec.end(), std::max( 0, m_vd.zvEnd ), [begin, drift] ( const auto& l, const auto& r ) { Adapter a; return (uint64_t)AdjustGpuTime( a(l).GpuStart(), begin, drift ) < (uint64_t)r; } ); + const auto zitend = std::lower_bound( it, vec.end(), std::max( 0, m_vd.zvEnd ), [begin, drift] ( const auto& l, const auto& r ) { Adapter a; return (uint64_t)AdjustGpuTime( a(l).Start(), begin, drift ) < (uint64_t)r; } ); if( it == zitend ) return depth; - if( AdjustGpuTime( a(*(zitend-1)).GpuEnd(), begin, drift ) < m_vd.zvStart ) return depth; + if( AdjustGpuTime( a(*(zitend-1)).End(), begin, drift ) < m_vd.zvStart ) return depth; depth++; int maxdepth = depth; @@ -332,7 +334,7 @@ int View::SkipGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, auto& ev = a(*it); auto end = m_worker.GetZoneEnd( ev ); if( end == std::numeric_limits::max() ) break; - const auto start = AdjustGpuTime( ev.GpuStart(), begin, drift ); + const auto start = AdjustGpuTime( ev.Start(), begin, drift ); end = AdjustGpuTime( end, begin, drift ); const auto zsz = std::max( ( end - start ) * pxns, pxns * 0.5 ); if( zsz < MinVisSize ) @@ -343,7 +345,7 @@ int View::SkipGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, for(;;) { const auto prevIt = it; - it = std::lower_bound( it, zitend, nextTime, [begin, drift] ( const auto& l, const auto& r ) { Adapter a; return (uint64_t)AdjustGpuTime( a(l).GpuEnd(), begin, drift ) < (uint64_t)r; } ); + it = std::lower_bound( it, zitend, nextTime, [begin, drift] ( const auto& l, const auto& r ) { Adapter a; return (uint64_t)AdjustGpuTime( a(l).End(), begin, drift ) < (uint64_t)r; } ); if( it == prevIt ) ++it; if( it == zitend ) break; const auto nend = AdjustGpuTime( m_worker.GetZoneEnd( a(*it) ), begin, drift ); @@ -357,7 +359,7 @@ int View::SkipGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, { if( ev.Child() >= 0 ) { - const auto d = DispatchGpuZoneLevel( m_worker.GetGpuChildren( ev.Child() ), hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); + const auto d = DispatchGpuZoneLevel( m_worker.GetZoneChildren( ev.Child() ), hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); if( d > maxdepth ) maxdepth = d; } ++it; diff --git a/profiler/src/profiler/TracyView_Memory.cpp b/profiler/src/profiler/TracyView_Memory.cpp index 7689683e56..6d4a4c53cc 100644 --- a/profiler/src/profiler/TracyView_Memory.cpp +++ b/profiler/src/profiler/TracyView_Memory.cpp @@ -597,11 +597,11 @@ void View::DrawMemoryAllocWindow() bool sep = false; auto zoneAlloc = FindZoneAtTime( tidAlloc, ev.TimeAlloc() ); - if( zoneAlloc ) + if( zoneAlloc.first ) { ImGui::Separator(); sep = true; - const auto& srcloc = m_worker.GetSourceLocation( zoneAlloc->SrcLoc() ); + const auto& srcloc = m_worker.GetSourceLocation( zoneAlloc.first->SrcLoc() ); const auto txt = srcloc.name.active ? m_worker.GetString( srcloc.name ) : m_worker.GetString( srcloc.function ); ImGui::PushID( idx++ ); TextFocused( "Zone alloc:", txt ); @@ -609,41 +609,41 @@ void View::DrawMemoryAllocWindow() ImGui::PopID(); if( ImGui::IsItemClicked() ) { - ShowZoneInfo( *zoneAlloc ); + ShowZoneInfo( *zoneAlloc.first ); } if( hover ) { - m_zoneHighlight = zoneAlloc; + m_zoneHighlight = zoneAlloc.first; if( IsMouseClicked( 2 ) ) { - ZoomToZone( *zoneAlloc ); + ZoomToZone( *zoneAlloc.first ); } - ZoneTooltip( *zoneAlloc ); + ZoneTooltip( *zoneAlloc.first, *zoneAlloc.second ); } } if( ev.TimeFree() >= 0 ) { auto zoneFree = FindZoneAtTime( tidFree, ev.TimeFree() ); - if( zoneFree ) + if( zoneFree.first ) { if( !sep ) ImGui::Separator(); - const auto& srcloc = m_worker.GetSourceLocation( zoneFree->SrcLoc() ); + const auto& srcloc = m_worker.GetSourceLocation( zoneFree.first->SrcLoc() ); const auto txt = srcloc.name.active ? m_worker.GetString( srcloc.name ) : m_worker.GetString( srcloc.function ); TextFocused( "Zone free:", txt ); auto hover = ImGui::IsItemHovered(); if( ImGui::IsItemClicked() ) { - ShowZoneInfo( *zoneFree ); + ShowZoneInfo( *zoneFree.first ); } if( hover ) { - m_zoneHighlight = zoneFree; + m_zoneHighlight = zoneFree.first; if( IsMouseClicked( 2 ) ) { - ZoomToZone( *zoneFree ); + ZoomToZone( *zoneFree.first ); } - ZoneTooltip( *zoneFree ); + ZoneTooltip( *zoneFree.first, *zoneAlloc.second ); } if( zoneAlloc == zoneFree ) { @@ -810,30 +810,30 @@ void View::ListMemData( std::vector& vec, const std::functionThreadAlloc() ), v->TimeAlloc() ); - if( !zone ) + if( !zone.first ) { ImGui::TextUnformatted( "-" ); } else { - const auto& srcloc = m_worker.GetSourceLocation( zone->SrcLoc() ); + const auto& srcloc = m_worker.GetSourceLocation( zone.first->SrcLoc() ); const auto txt = srcloc.name.active ? m_worker.GetString( srcloc.name ) : m_worker.GetString( srcloc.function ); ImGui::PushID( idx++ ); - auto sel = ImGui::Selectable( txt, m_zoneInfoWindow == zone ); + auto sel = ImGui::Selectable( txt, m_zoneInfoWindow == zone.first ); auto hover = ImGui::IsItemHovered(); ImGui::PopID(); if( sel ) { - ShowZoneInfo( *zone ); + ShowZoneInfo( *zone.first ); } if( hover ) { - m_zoneHighlight = zone; + m_zoneHighlight = zone.first; if( IsMouseClicked( 2 ) ) { - ZoomToZone( *zone ); + ZoomToZone( *zone.first ); } - ZoneTooltip( *zone ); + ZoneTooltip( *zone.first, *zone.second ); } } ImGui::TableNextColumn(); @@ -844,40 +844,40 @@ void View::ListMemData( std::vector& vec, const std::functionThreadFree() ), v->TimeFree() ); - if( !zoneFree ) + if( !zoneFree.first ) { ImGui::TextUnformatted( "-" ); } else { - const auto& srcloc = m_worker.GetSourceLocation( zoneFree->SrcLoc() ); + const auto& srcloc = m_worker.GetSourceLocation( zoneFree.first->SrcLoc() ); const auto txt = srcloc.name.active ? m_worker.GetString( srcloc.name ) : m_worker.GetString( srcloc.function ); ImGui::PushID( idx++ ); bool sel; if( zoneFree == zone ) { ImGui::PushStyleColor( ImGuiCol_Text, ImVec4( 1.f, 1.f, 0.6f, 1.f ) ); - sel = ImGui::Selectable( txt, m_zoneInfoWindow == zoneFree ); + sel = ImGui::Selectable( txt, m_zoneInfoWindow == zoneFree.first ); ImGui::PopStyleColor( 1 ); } else { - sel = ImGui::Selectable( txt, m_zoneInfoWindow == zoneFree ); + sel = ImGui::Selectable( txt, m_zoneInfoWindow == zoneFree.first ); } auto hover = ImGui::IsItemHovered(); ImGui::PopID(); if( sel ) { - ShowZoneInfo( *zoneFree ); + ShowZoneInfo( *zoneFree.first ); } if( hover ) { - m_zoneHighlight = zoneFree; + m_zoneHighlight = zoneFree.first; if( IsMouseClicked( 2 ) ) { - ZoomToZone( *zoneFree ); + ZoomToZone( *zoneFree.first ); } - ZoneTooltip( *zoneFree ); + ZoneTooltip( *zoneFree.first, *zoneFree.second ); } } } diff --git a/profiler/src/profiler/TracyView_Navigation.cpp b/profiler/src/profiler/TracyView_Navigation.cpp index 75f5eaea29..3aef3b54d1 100644 --- a/profiler/src/profiler/TracyView_Navigation.cpp +++ b/profiler/src/profiler/TracyView_Navigation.cpp @@ -7,33 +7,26 @@ void View::ZoomToZone( const ZoneEvent& ev ) { const auto end = m_worker.GetZoneEnd( ev ); if( end - ev.Start() <= 0 ) return; - ZoomToRange( ev.Start(), end ); -} - -void View::ZoomToZone( const GpuEvent& ev ) -{ - const auto end = m_worker.GetZoneEnd( ev ); - if( end - ev.GpuStart() <= 0 ) return; - auto ctx = GetZoneCtx( ev ); - if( !ctx ) + auto td = GetZoneThreadData( ev ); + auto ctx = td->ctx; + if ( !ctx || ctx->type == ZoneContextType::CPU) { - ZoomToRange( ev.GpuStart(), end ); + ZoomToRange( ev.Start(), end ); } else { - const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.Thread() ) ); - assert( td != ctx->threadData.end() ); + assert( td ); int64_t begin; - if( td->second.timeline.is_magic() ) + if( td->timeline.is_magic() ) { - begin = ((Vector*)&td->second.timeline)->front().GpuStart(); + begin = ( (Vector*)&td->timeline )->front().Start(); } else { - begin = td->second.timeline.front()->GpuStart(); + begin = td->timeline.front()->Start(); } const auto drift = GpuDrift( ctx ); - ZoomToRange( AdjustGpuTime( ev.GpuStart(), begin, drift ), AdjustGpuTime( end, begin, drift ) ); + ZoomToRange( AdjustGpuTime( ev.Start(), begin, drift ), AdjustGpuTime( end, begin, drift ) ); } } diff --git a/profiler/src/profiler/TracyView_Options.cpp b/profiler/src/profiler/TracyView_Options.cpp index 45ca4fac63..b9893e0031 100644 --- a/profiler/src/profiler/TracyView_Options.cpp +++ b/profiler/src/profiler/TracyView_Options.cpp @@ -102,7 +102,7 @@ void View::DrawOptions() DefaultMarker(default_markers_active); } - const auto& gpuData = m_worker.GetGpuData(); + const auto& gpuData = m_worker.GetCtxData(); if( !gpuData.empty() ) { ImGui::Separator(); @@ -112,7 +112,11 @@ void View::DrawOptions() const auto expand = ImGui::TreeNode( "GPU zones" ); ImGui::SameLine(); size_t visibleGpu = 0; - for( const auto& gd : gpuData ) if( m_tc.GetItem( gd ).IsVisible() ) visibleGpu++; + for( const auto& gd : gpuData ) + { + if( gd->type == ZoneContextType::CPU ) continue; + if( m_tc.GetItem( gd ).IsVisible() ) visibleGpu++; + } if( visibleGpu == gpuData.size() ) { ImGui::TextDisabled( "(%zu)", gpuData.size() ); @@ -125,7 +129,8 @@ void View::DrawOptions() { for( size_t i=0; ithreadData.begin()->second.timeline; + if ( gpuData[i]->type == ZoneContextType::CPU ) continue; + const auto& timeline = gpuData[i]->threadData.begin()->second->timeline; m_tc.GetItem( gpuData[i] ).VisibilityCheckbox(); ImGui::SameLine(); if( gpuData[i]->threadData.size() == 1 ) @@ -140,12 +145,12 @@ void View::DrawOptions() { char buf[64]; auto& item = (TimelineItemGpu&)( m_tc.GetItem( gpuData[i] ) ); - sprintf( buf, "%s context %i", GpuContextNames[(int)gpuData[i]->type], item.GetIdx() ); + sprintf( buf, "%s context %i", ZoneContextNames[(int)gpuData[i]->type], item.GetIdx() ); ImGui::PushFont( g_fonts.normal, FontSmall ); ImGui::TextUnformatted( buf ); ImGui::PopFont(); } - if( !gpuData[i]->hasCalibration ) + if( !static_cast(gpuData[i])->hasCalibration ) { ImGui::TreePush( (void*)nullptr ); auto& drift = GpuDrift( gpuData[i] ); @@ -161,10 +166,10 @@ void View::DrawOptions() size_t lastidx = 0; if( timeline.is_magic() ) { - auto& tl = *((Vector*)&timeline); + auto& tl = *((Vector*)&timeline); for( size_t j=tl.size()-1; j > 0; j-- ) { - if( tl[j].GpuEnd() >= 0 ) + if( tl[j].End() >= 0 ) { lastidx = j; break; @@ -175,7 +180,7 @@ void View::DrawOptions() { for( size_t j=timeline.size()-1; j > 0; j-- ) { - if( timeline[j]->GpuEnd() >= 0 ) + if( timeline[j]->End() >= 0 ) { lastidx = j; break; @@ -191,14 +196,16 @@ void View::DrawOptions() size_t idx = 0; if( timeline.is_magic() ) { - auto& tl = *((Vector*)&timeline); + auto& tl = *((Vector*)&timeline); do { const auto p0 = dist( gen ); const auto p1 = dist( gen ); if( p0 != p1 ) { - slopes[idx++] = float( 1.0 - double( tl[p1].GpuStart() - tl[p0].GpuStart() ) / double( tl[p1].CpuStart() - tl[p0].CpuStart() ) ); + const auto e1 = m_worker.GetZoneExtra( tl[p1] ); + const auto e0 = m_worker.GetZoneExtra( tl[p0] ); + slopes[idx++] = float( 1.0 - double( tl[p1].Start() - tl[p0].Start() ) / double( e1.otherStart.Val() - e0.otherStart.Val() ) ); } } while( idx < NumSlopes ); @@ -211,7 +218,9 @@ void View::DrawOptions() const auto p1 = dist( gen ); if( p0 != p1 ) { - slopes[idx++] = float( 1.0 - double( timeline[p1]->GpuStart() - timeline[p0]->GpuStart() ) / double( timeline[p1]->CpuStart() - timeline[p0]->CpuStart() ) ); + const auto e1 = m_worker.GetZoneExtra( *timeline[p1].get() ); + const auto e0 = m_worker.GetZoneExtra( *timeline[p0].get() ); + slopes[idx++] = float( 1.0 - double( timeline[p1]->Start() - timeline[p0]->Start() ) / double( e1.otherStart.Val() - e0.otherStart.Val() ) ); } } while( idx < NumSlopes ); diff --git a/profiler/src/profiler/TracyView_Statistics.cpp b/profiler/src/profiler/TracyView_Statistics.cpp index f8101897b2..238a537cac 100644 --- a/profiler/src/profiler/TracyView_Statistics.cpp +++ b/profiler/src/profiler/TracyView_Statistics.cpp @@ -45,7 +45,12 @@ void View::DrawStatistics() ImGui::TextWrapped( "Collection of statistical data is disabled in this build." ); ImGui::TextWrapped( "Rebuild without the TRACY_NO_STATISTICS macro to enable statistics view." ); #else - if( !m_worker.AreSourceLocationZonesReady() && ( !m_worker.AreCallstackSamplesReady() || m_worker.GetCallstackSampleCount() == 0 ) ) + ContextCombo( m_statCtxName, &m_statCtx ); + auto ctx = m_worker.GetCtxData()[m_statCtx]; + + ImGui::SameLine(); + + if( !ctx->AreSourceLocationZonesReady() && ( !m_worker.AreCallstackSamplesReady() || m_worker.GetCallstackSampleCount() == 0 ) ) { const auto ty = ImGui::GetTextLineHeight(); ImGui::PushFont( g_fonts.normal, FontBig ); @@ -78,13 +83,6 @@ void View::DrawStatistics() ImGui::RadioButton( ICON_FA_PUZZLE_PIECE " Symbols", &m_statMode, 1 ); } } - if( m_worker.GetGpuZoneCount() > 0 ) - { - ImGui::SameLine(); - ImGui::Spacing(); - ImGui::SameLine(); - ImGui::RadioButton( ICON_FA_EYE " GPU", &m_statMode, 2 ); - } ImGui::SameLine(); ImGui::Spacing(); ImGui::SameLine(); @@ -99,7 +97,7 @@ void View::DrawStatistics() bool copySrclocsToClipboard = false; if( m_statMode == 0 ) { - if( !m_worker.AreSourceLocationZonesReady() ) + if( !ctx->AreSourceLocationZonesReady() ) { ImGui::Spacing(); ImGui::Separator(); @@ -111,7 +109,7 @@ void View::DrawStatistics() } const auto filterActive = m_statisticsFilter.IsActive(); - auto& slz = m_worker.GetSourceLocationZones(); + auto& slz = ctx->GetSourceLocationZones(); srcloc.reserve( slz.size() ); uint32_t slzcnt = 0; if( m_statRange.active ) @@ -321,6 +319,8 @@ void View::DrawStatistics() } else { + assert(0); + /* assert( m_statMode == 2 ); if( !m_worker.AreGpuSourceLocationZonesReady() ) { @@ -456,6 +456,7 @@ void View::DrawStatistics() TextFocused( "Visible zones:", RealToString( srcloc.size() ) ); ImGui::SameLine(); copySrclocsToClipboard = ClipboardButton(); + */ } ImGui::Separator(); diff --git a/profiler/src/profiler/TracyView_Timeline.cpp b/profiler/src/profiler/TracyView_Timeline.cpp index 803fcb69b7..e1e546ca70 100644 --- a/profiler/src/profiler/TracyView_Timeline.cpp +++ b/profiler/src/profiler/TracyView_Timeline.cpp @@ -358,43 +358,49 @@ void View::DrawTimeline() const auto to = 9.f; const auto th = ( ty - to ) * sqrt( 3 ) * 0.5; - if( m_vd.drawGpuZones ) + for( auto& v : m_worker.GetCtxData() ) { - for( auto& v : m_worker.GetGpuData() ) + // TODO: Consolidate handling here. + if( m_vd.drawGpuZones && v->type != ZoneContextType::CPU ) { - m_tc.AddItem( v ); + m_tc.AddItem( static_cast( v ) ); } - } - if( m_vd.drawCpuData && m_worker.HasContextSwitches() ) - { - static char uptr; - m_tc.AddItem( &uptr ); - } - if( m_vd.drawZones ) - { - const auto& threadData = m_worker.GetThreadData(); - if( threadData.size() != m_threadOrder.size() ) + else if( v->type == ZoneContextType::CPU ) { - m_threadOrder.reserve( threadData.size() ); - // Only new threads are in the end of the worker's ThreadData vector. - // Threads which get reordered by received thread hints are not new, yet removed from m_threadOrder. - // Therefore, those are kept in the m_threadReinsert vector. As such, we will gather first threads from the - // reinsert vector, and afterwards the remaining ones must be new (and thus found at the end of threadData). - size_t numReinsert = m_threadReinsert.size(); - size_t numNew = threadData.size() - m_threadOrder.size() - numReinsert; - for( size_t i = 0; i < numReinsert + numNew; i++ ) + if( m_vd.drawCpuData && m_worker.HasContextSwitches() ) { - const ThreadData *td = i < numReinsert ? m_threadReinsert[i] : threadData[m_threadOrder.size()]; - auto it = std::find_if( m_threadOrder.begin(), m_threadOrder.end(), [td]( const auto t ) { return td->groupHint < t->groupHint; } ); - m_threadOrder.insert( it, td ); + static char uptr; + m_tc.AddItem( &uptr ); + } + + if( m_vd.drawZones ) + { + const auto& threadData = v->threads; + if( threadData.size() != m_threadOrder.size() ) + { + m_threadOrder.reserve( threadData.size() ); + // Only new threads are in the end of the worker's ThreadData vector. + // Threads which get reordered by received thread hints are not new, yet removed from m_threadOrder. + // Therefore, those are kept in the m_threadReinsert vector. As such, we will gather first threads from the + // reinsert vector, and afterwards the remaining ones must be new (and thus found at the end of threadData). + size_t numReinsert = m_threadReinsert.size(); + size_t numNew = threadData.size() - m_threadOrder.size() - numReinsert; + for( size_t i = 0; i < numReinsert + numNew; i++ ) + { + const ThreadData* td = i < numReinsert ? m_threadReinsert[i] : threadData[m_threadOrder.size()]; + auto it = std::find_if( m_threadOrder.begin(), m_threadOrder.end(), [td]( const auto t ) { return td->groupHint < t->groupHint; } ); + m_threadOrder.insert( it, td ); + } + m_threadReinsert.clear(); + } + for( const auto& v : m_threadOrder ) + { + m_tc.AddItem( v ); + } } - m_threadReinsert.clear(); - } - for( const auto& v : m_threadOrder ) - { - m_tc.AddItem( v ); } } + if( m_vd.drawPlots ) { for( const auto& v : m_worker.GetPlots() ) @@ -471,13 +477,6 @@ void View::DrawTimeline() draw->AddRectFilled( ImVec2( wpos.x + px0, linepos.y ), ImVec2( wpos.x + px1, linepos.y + lineh ), 0x228888DD ); draw->AddRect( ImVec2( wpos.x + px0, linepos.y ), ImVec2( wpos.x + px1, linepos.y + lineh ), 0x448888DD ); } - if( m_gpuInfoWindow ) - { - const auto px0 = ( m_gpuInfoWindow->CpuStart() - m_vd.zvStart ) * pxns; - const auto px1 = std::max( px0 + std::max( 1.0, pxns * 0.5 ), ( m_gpuInfoWindow->CpuEnd() - m_vd.zvStart ) * pxns ); - draw->AddRectFilled( ImVec2( wpos.x + px0, linepos.y ), ImVec2( wpos.x + px1, linepos.y + lineh ), 0x2288DD88 ); - draw->AddRect( ImVec2( wpos.x + px0, linepos.y ), ImVec2( wpos.x + px1, linepos.y + lineh ), 0x4488DD88 ); - } const auto scale = GetScale(); if( m_findZone.range.active && ( m_findZone.show || m_showRanges ) ) diff --git a/profiler/src/profiler/TracyView_Utility.cpp b/profiler/src/profiler/TracyView_Utility.cpp index cc08264a9d..2fecf4c1d0 100644 --- a/profiler/src/profiler/TracyView_Utility.cpp +++ b/profiler/src/profiler/TracyView_Utility.cpp @@ -77,13 +77,6 @@ uint32_t View::GetZoneColor( const ZoneEvent& ev, uint64_t thread, int depth ) } } -uint32_t View::GetZoneColor( const GpuEvent& ev ) -{ - const auto& srcloc = m_worker.GetSourceLocation( ev.SrcLoc() ); - const auto color = srcloc.color; - return color != 0 ? ( color | 0xFF000000 ) : 0xFF222288; -} - View::ZoneColorData View::GetZoneColorData( const ZoneEvent& ev, uint64_t thread, int depth, uint32_t inheritedColor ) { ZoneColorData ret; @@ -136,38 +129,13 @@ View::ZoneColorData View::GetZoneColorData( const ZoneEvent& ev, uint64_t thread return ret; } -View::ZoneColorData View::GetZoneColorData( const GpuEvent& ev ) -{ - ZoneColorData ret; - const auto color = GetZoneColor( ev ); - ret.color = color; - if( m_gpuInfoWindow == &ev ) - { - ret.accentColor = 0xFF44DD44; - ret.thickness = 3.f; - ret.highlight = true; - } - else if( m_gpuHighlight == &ev ) - { - ret.accentColor = 0xFF4444FF; - ret.thickness = 3.f; - ret.highlight = true; - } - else - { - ret.accentColor = HighlightColor( color ); - ret.thickness = 1.f; - ret.highlight = false; - } - return ret; -} - - -const ZoneEvent* View::FindZoneAtTime( uint64_t thread, int64_t time ) const +const pair View::FindZoneAtTime( uint64_t thread, int64_t time ) const { + for( const auto ctx : m_worker.GetCtxData() ) + { // TODO add thread rev-map ThreadData* td = nullptr; - for( const auto& t : m_worker.GetThreadData() ) + for( const auto& t : ctx->threads ) { if( t->id == thread ) { @@ -175,10 +143,10 @@ const ZoneEvent* View::FindZoneAtTime( uint64_t thread, int64_t time ) const break; } } - if( !td ) return nullptr; + if( !td ) return { nullptr, nullptr }; const Vector>* timeline = &td->timeline; - if( timeline->empty() ) return nullptr; + if( timeline->empty() ) return { nullptr, nullptr }; const ZoneEvent* ret = nullptr; for(;;) { @@ -187,21 +155,23 @@ const ZoneEvent* View::FindZoneAtTime( uint64_t thread, int64_t time ) const auto vec = (Vector*)timeline; auto it = std::upper_bound( vec->begin(), vec->end(), time, [] ( const auto& l, const auto& r ) { return l < r.Start(); } ); if( it != vec->begin() ) --it; - if( it->Start() > time || ( it->IsEndValid() && it->End() < time ) ) return ret; + if( it->Start() > time || ( it->IsEndValid() && it->End() < time ) ) return { ret, td }; ret = it; - if( !it->HasChildren() ) return ret; + if( !it->HasChildren() ) return { ret, td }; timeline = &m_worker.GetZoneChildren( it->Child() ); } else { auto it = std::upper_bound( timeline->begin(), timeline->end(), time, [] ( const auto& l, const auto& r ) { return l < r->Start(); } ); if( it != timeline->begin() ) --it; - if( (*it)->Start() > time || ( (*it)->IsEndValid() && (*it)->End() < time ) ) return ret; + if( (*it)->Start() > time || ( (*it)->IsEndValid() && (*it)->End() < time ) ) return { ret, td }; ret = *it; - if( !(*it)->HasChildren() ) return ret; + if( !(*it)->HasChildren() ) return { ret, td }; timeline = &m_worker.GetZoneChildren( (*it)->Child() ); } } + } + return { nullptr, nullptr }; } const ZoneEvent* View::GetZoneChild( const ZoneEvent& zone, int64_t time ) const @@ -227,32 +197,73 @@ const ZoneEvent* View::GetZoneChild( const ZoneEvent& zone, int64_t time ) const const ZoneEvent* View::GetZoneParent( const ZoneEvent& zone ) const { -#ifndef TRACY_NO_STATISTICS - if( m_worker.AreSourceLocationZonesReady() ) + for( const auto& ctx : m_worker.GetCtxData() ) { - auto& slz = m_worker.GetZonesForSourceLocation( zone.SrcLoc() ); - if( !slz.zones.empty() && slz.zones.is_sorted() ) +#ifndef TRACY_NO_STATISTICS + if( ctx->AreSourceLocationZonesReady() ) { - auto it = std::lower_bound( slz.zones.begin(), slz.zones.end(), zone.Start(), [] ( const auto& lhs, const auto& rhs ) { return lhs.Zone()->Start() < rhs; } ); - if( it != slz.zones.end() && it->Zone() == &zone ) + auto& slz = ctx->GetZonesForSourceLocation( zone.SrcLoc() ); + if( !slz.zones.empty() && slz.zones.is_sorted() ) { - return GetZoneParent( zone, m_worker.DecompressThread( it->Thread() ) ); + auto it = std::lower_bound( slz.zones.begin(), slz.zones.end(), zone.Start(), []( const auto& lhs, const auto& rhs ) { return lhs.Zone()->Start() < rhs; } ); + if( it != slz.zones.end() && it->Zone() == &zone ) + { + return GetZoneParent( zone, m_worker.DecompressThread( it->Thread() ) ); + } } } - } #endif - for( const auto& thread : m_worker.GetThreadData() ) + for( const auto& thread : ctx->threads ) + { + const ZoneEvent* parent = nullptr; + const Vector>* timeline = &thread->timeline; + if( timeline->empty() ) continue; + for( ;; ) + { + if( timeline->is_magic() ) + { + auto vec = (Vector*)timeline; + auto it = std::upper_bound( vec->begin(), vec->end(), zone.Start(), []( const auto& l, const auto& r ) { return l < r.Start(); } ); + if( it != vec->begin() ) --it; + if( zone.IsEndValid() && it->Start() > zone.End() ) break; + if( it == &zone ) return parent; + if( !it->HasChildren() ) break; + parent = it; + timeline = &m_worker.GetZoneChildren( parent->Child() ); + } + else + { + auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.Start(), []( const auto& l, const auto& r ) { return l < r->Start(); } ); + if( it != timeline->begin() ) --it; + if( zone.IsEndValid() && ( *it )->Start() > zone.End() ) break; + if( *it == &zone ) return parent; + if( !( *it )->HasChildren() ) break; + parent = *it; + timeline = &m_worker.GetZoneChildren( parent->Child() ); + } + } + } + } + return nullptr; +} + +const ZoneEvent* View::GetZoneParent( const ZoneEvent& zone, uint64_t tid ) const +{ + for( const auto& ctx : m_worker.GetCtxData() ) { + auto it = ctx->threadData.find( tid ); + if( it == ctx->threadData.end() ) continue; + const auto thread = it->second; const ZoneEvent* parent = nullptr; const Vector>* timeline = &thread->timeline; - if( timeline->empty() ) continue; - for(;;) + if( timeline->empty() ) return nullptr; + for( ;; ) { if( timeline->is_magic() ) { auto vec = (Vector*)timeline; - auto it = std::upper_bound( vec->begin(), vec->end(), zone.Start(), [] ( const auto& l, const auto& r ) { return l < r.Start(); } ); + auto it = std::upper_bound( vec->begin(), vec->end(), zone.Start(), []( const auto& l, const auto& r ) { return l < r.Start(); } ); if( it != vec->begin() ) --it; if( zone.IsEndValid() && it->Start() > zone.End() ) break; if( it == &zone ) return parent; @@ -262,11 +273,11 @@ const ZoneEvent* View::GetZoneParent( const ZoneEvent& zone ) const } else { - auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.Start(), [] ( const auto& l, const auto& r ) { return l < r->Start(); } ); + auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.Start(), []( const auto& l, const auto& r ) { return l < r->Start(); } ); if( it != timeline->begin() ) --it; - if( zone.IsEndValid() && (*it)->Start() > zone.End() ) break; + if( zone.IsEndValid() && ( *it )->Start() > zone.End() ) break; if( *it == &zone ) return parent; - if( !(*it)->HasChildren() ) break; + if( !( *it )->HasChildren() ) break; parent = *it; timeline = &m_worker.GetZoneChildren( parent->Child() ); } @@ -275,57 +286,26 @@ const ZoneEvent* View::GetZoneParent( const ZoneEvent& zone ) const return nullptr; } -const ZoneEvent* View::GetZoneParent( const ZoneEvent& zone, uint64_t tid ) const -{ - const auto thread = m_worker.GetThreadData( tid ); - const ZoneEvent* parent = nullptr; - const Vector>* timeline = &thread->timeline; - if( timeline->empty() ) return nullptr; - for(;;) - { - if( timeline->is_magic() ) - { - auto vec = (Vector*)timeline; - auto it = std::upper_bound( vec->begin(), vec->end(), zone.Start(), [] ( const auto& l, const auto& r ) { return l < r.Start(); } ); - if( it != vec->begin() ) --it; - if( zone.IsEndValid() && it->Start() > zone.End() ) break; - if( it == &zone ) return parent; - if( !it->HasChildren() ) break; - parent = it; - timeline = &m_worker.GetZoneChildren( parent->Child() ); - } - else - { - auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.Start(), [] ( const auto& l, const auto& r ) { return l < r->Start(); } ); - if( it != timeline->begin() ) --it; - if( zone.IsEndValid() && (*it)->Start() > zone.End() ) break; - if( *it == &zone ) return parent; - if( !(*it)->HasChildren() ) break; - parent = *it; - timeline = &m_worker.GetZoneChildren( parent->Child() ); - } - } - return nullptr; -} - bool View::IsZoneReentry( const ZoneEvent& zone ) const { + for( const auto& ctx : m_worker.GetCtxData() ) + { #ifndef TRACY_NO_STATISTICS - if( m_worker.AreSourceLocationZonesReady() ) + if( ctx->AreSourceLocationZonesReady() ) { - auto& slz = m_worker.GetZonesForSourceLocation( zone.SrcLoc() ); + auto& slz = ctx->GetZonesForSourceLocation( zone.SrcLoc() ); if( !slz.zones.empty() && slz.zones.is_sorted() ) { auto it = std::lower_bound( slz.zones.begin(), slz.zones.end(), zone.Start(), [] ( const auto& lhs, const auto& rhs ) { return lhs.Zone()->Start() < rhs; } ); if( it != slz.zones.end() && it->Zone() == &zone ) { - return IsZoneReentry( zone, m_worker.DecompressThread( it->Thread() ) ); + return IsZoneReentry( zone, m_worker.DecompressThread( it->Thread() ), ctx ); } } } #endif - for( const auto& thread : m_worker.GetThreadData() ) + for( const auto& thread : ctx->threads ) { const ZoneEvent* parent = nullptr; const Vector>* timeline = &thread->timeline; @@ -357,12 +337,13 @@ bool View::IsZoneReentry( const ZoneEvent& zone ) const } } } - return false; + } + return false; } -bool View::IsZoneReentry( const ZoneEvent& zone, uint64_t tid ) const +bool View::IsZoneReentry( const ZoneEvent& zone, uint64_t tid, const ZoneContext* ctx ) const { - const auto thread = m_worker.GetThreadData( tid ); + const auto thread = ctx->GetThreadData( tid ); const ZoneEvent* parent = nullptr; const Vector>* timeline = &thread->timeline; if( timeline->empty() ) return false; @@ -395,165 +376,50 @@ bool View::IsZoneReentry( const ZoneEvent& zone, uint64_t tid ) const return false; } -const GpuEvent* View::GetZoneParent( const GpuEvent& zone ) const -{ - for( const auto& ctx : m_worker.GetGpuData() ) - { - for( const auto& td : ctx->threadData ) - { - const GpuEvent* parent = nullptr; - const Vector>* timeline = &td.second.timeline; - if( timeline->empty() ) continue; - for(;;) - { - if( timeline->is_magic() ) - { - auto vec = (Vector*)timeline; - auto it = std::upper_bound( vec->begin(), vec->end(), zone.GpuStart(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r.GpuStart(); } ); - if( it != vec->begin() ) --it; - if( zone.GpuEnd() >= 0 && it->GpuStart() > zone.GpuEnd() ) break; - if( it == &zone ) return parent; - if( it->Child() < 0 ) break; - parent = it; - timeline = &m_worker.GetGpuChildren( parent->Child() ); - } - else - { - auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.GpuStart(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->GpuStart(); } ); - if( it != timeline->begin() ) --it; - if( zone.GpuEnd() >= 0 && (*it)->GpuStart() > zone.GpuEnd() ) break; - if( *it == &zone ) return parent; - if( (*it)->Child() < 0 ) break; - parent = *it; - timeline = &m_worker.GetGpuChildren( parent->Child() ); - } - } - } - } - return nullptr; -} - const ThreadData* View::GetZoneThreadData( const ZoneEvent& zone ) const { -#ifndef TRACY_NO_STATISTICS - if( m_worker.AreSourceLocationZonesReady() ) - { - auto& slz = m_worker.GetZonesForSourceLocation( zone.SrcLoc() ); - if( !slz.zones.empty() && slz.zones.is_sorted() ) - { - auto it = std::lower_bound( slz.zones.begin(), slz.zones.end(), zone.Start(), [] ( const auto& lhs, const auto& rhs ) { return lhs.Zone()->Start() < rhs; } ); - if( it != slz.zones.end() && it->Zone() == &zone ) - { - return m_worker.GetThreadData( m_worker.DecompressThread( it->Thread() ) ); - } - } - } -#endif - for( const auto& thread : m_worker.GetThreadData() ) + for( const auto& ctx : m_worker.GetCtxData() ) { - const Vector>* timeline = &thread->timeline; - if( timeline->empty() ) continue; - for(;;) - { - if( timeline->is_magic() ) - { - auto vec = (Vector*)timeline; - auto it = std::upper_bound( vec->begin(), vec->end(), zone.Start(), [] ( const auto& l, const auto& r ) { return l < r.Start(); } ); - if( it != vec->begin() ) --it; - if( zone.IsEndValid() && it->Start() > zone.End() ) break; - if( it == &zone ) return thread; - if( !it->HasChildren() ) break; - timeline = &m_worker.GetZoneChildren( it->Child() ); - } - else - { - auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.Start(), [] ( const auto& l, const auto& r ) { return l < r->Start(); } ); - if( it != timeline->begin() ) --it; - if( zone.IsEndValid() && (*it)->Start() > zone.End() ) break; - if( *it == &zone ) return thread; - if( !(*it)->HasChildren() ) break; - timeline = &m_worker.GetZoneChildren( (*it)->Child() ); - } - } - } - return nullptr; -} - -uint64_t View::GetZoneThread( const ZoneEvent& zone ) const -{ - auto threadData = GetZoneThreadData( zone ); - return threadData ? threadData->id : 0; -} - -uint64_t View::GetZoneThread( const GpuEvent& zone ) const -{ - if( zone.Thread() == 0 ) - { - for( const auto& ctx : m_worker.GetGpuData() ) +#ifndef TRACY_NO_STATISTICS + if( ctx->AreSourceLocationZonesReady() ) { - if ( ctx->threadData.size() != 1 ) continue; - const Vector>* timeline = &ctx->threadData.begin()->second.timeline; - if( timeline->empty() ) continue; - for(;;) + auto& slz = ctx->GetZonesForSourceLocation( zone.SrcLoc() ); + if( !slz.zones.empty() && slz.zones.is_sorted() ) { - if( timeline->is_magic() ) - { - auto vec = (Vector*)timeline; - auto it = std::upper_bound( vec->begin(), vec->end(), zone.GpuStart(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r.GpuStart(); } ); - if( it != vec->begin() ) --it; - if( zone.GpuEnd() >= 0 && it->GpuStart() > zone.GpuEnd() ) break; - if( it == &zone ) return ctx->thread; - if( it->Child() < 0 ) break; - timeline = &m_worker.GetGpuChildren( it->Child() ); - } - else + auto it = std::lower_bound( slz.zones.begin(), slz.zones.end(), zone.Start(), []( const auto& lhs, const auto& rhs ) { return lhs.Zone()->Start() < rhs; } ); + if( it != slz.zones.end() && it->Zone() == &zone ) { - auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.GpuStart(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->GpuStart(); } ); - if( it != timeline->begin() ) --it; - if( zone.GpuEnd() >= 0 && (*it)->GpuStart() > zone.GpuEnd() ) break; - if( *it == &zone ) return ctx->thread; - if( (*it)->Child() < 0 ) break; - timeline = &m_worker.GetGpuChildren( (*it)->Child() ); + return ctx->GetThreadData( m_worker.DecompressThread( it->Thread() ) ); } } } - return 0; - } - else - { - return m_worker.DecompressThread( zone.Thread() ); - } -} +#endif -const GpuCtxData* View::GetZoneCtx( const GpuEvent& zone ) const -{ - for( const auto& ctx : m_worker.GetGpuData() ) - { for( const auto& td : ctx->threadData ) { - const Vector>* timeline = &td.second.timeline; + const Vector>* timeline = &td.second->timeline; if( timeline->empty() ) continue; for(;;) { if( timeline->is_magic() ) { - auto vec = (Vector*)timeline; - auto it = std::upper_bound( vec->begin(), vec->end(), zone.GpuStart(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r.GpuStart(); } ); + auto vec = (Vector*)timeline; + auto it = std::upper_bound( vec->begin(), vec->end(), zone.Start(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r.Start(); } ); if( it != vec->begin() ) --it; - if( zone.GpuEnd() >= 0 && it->GpuStart() > zone.GpuEnd() ) break; - if( it == &zone ) return ctx; + if( zone.End() >= 0 && it->Start() > zone.End() ) break; + if( it == &zone ) return td.second; if( it->Child() < 0 ) break; - timeline = &m_worker.GetGpuChildren( it->Child() ); + timeline = &m_worker.GetZoneChildren( it->Child() ); } else { - auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.GpuStart(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->GpuStart(); } ); + auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.Start(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->Start(); } ); if( it != timeline->begin() ) --it; - if( zone.GpuEnd() >= 0 && (*it)->GpuStart() > zone.GpuEnd() ) break; - if( *it == &zone ) return ctx; + if( zone.End() >= 0 && (*it)->Start() > zone.End() ) break; + if( *it == &zone ) return td.second; if( (*it)->Child() < 0 ) break; - timeline = &m_worker.GetGpuChildren( (*it)->Child() ); + timeline = &m_worker.GetZoneChildren( (*it)->Child() ); } } } @@ -561,6 +427,18 @@ const GpuCtxData* View::GetZoneCtx( const GpuEvent& zone ) const return nullptr; } +uint64_t View::GetZoneThread( const ZoneEvent& zone ) const +{ + auto td = GetZoneThreadData( zone ); + return td ? td->id : 0; +} + +const ZoneContext* View::GetZoneCtx( const ZoneEvent& zone ) const +{ + auto td = GetZoneThreadData(zone); + return td->ctx; +} + int64_t View::GetZoneChildTime( const ZoneEvent& zone ) { int64_t time = 0; @@ -588,33 +466,6 @@ int64_t View::GetZoneChildTime( const ZoneEvent& zone ) return time; } -int64_t View::GetZoneChildTime( const GpuEvent& zone ) -{ - int64_t time = 0; - if( zone.Child() >= 0 ) - { - auto& children = m_worker.GetGpuChildren( zone.Child() ); - if( children.is_magic() ) - { - auto& vec = *(Vector*)&children; - for( auto& v : vec ) - { - const auto childSpan = std::max( int64_t( 0 ), v.GpuEnd() - v.GpuStart() ); - time += childSpan; - } - } - else - { - for( auto& v : children ) - { - const auto childSpan = std::max( int64_t( 0 ), v->GpuEnd() - v->GpuStart() ); - time += childSpan; - } - } - } - return time; -} - int64_t View::GetZoneChildTimeFast( const ZoneEvent& zone ) { int64_t time = 0; @@ -695,20 +546,6 @@ int64_t View::GetZoneSelfTime( const ZoneEvent& zone ) return selftime; } -int64_t View::GetZoneSelfTime( const GpuEvent& zone ) -{ - if( m_cache.gpuSelfTime.first == &zone ) return m_cache.gpuSelfTime.second; - if( m_cache.gpuSelfTime2.first == &zone ) return m_cache.gpuSelfTime2.second; - const auto ztime = m_worker.GetZoneEnd( zone ) - zone.GpuStart(); - const auto selftime = ztime - GetZoneChildTime( zone ); - if( zone.GpuEnd() >= 0 ) - { - m_cache.gpuSelfTime2 = m_cache.gpuSelfTime; - m_cache.gpuSelfTime = std::make_pair( &zone, selftime ); - } - return selftime; -} - bool View::GetZoneRunningTime( const ContextSwitch* ctx, const ZoneEvent& ev, int64_t& time, uint64_t& cnt ) { auto it = std::lower_bound( ctx->v.begin(), ctx->v.end(), ev.Start(), [] ( const auto& l, const auto& r ) { return (uint64_t)l.End() < (uint64_t)r; } ); @@ -933,4 +770,42 @@ void View::UpdateTitle() } } +void View::ContextCombo( const std::string* ctxName, uint8_t* ctxIdx ) +{ + auto& ctxs = m_worker.GetCtxData(); + if( ctxName == nullptr || ctxName->empty() ) + { + ctxName = &m_worker.GetCtxName( *ctxIdx ); + } + + float ctxComboWidth = 0.0f; + for( uint8_t i = 0; i < ctxs.size(); i++ ) + { + float width = ImGui::CalcTextSize( m_worker.GetCtxName( i ).c_str() ).x + ImGui::GetTextLineHeight() * 2; + if( width > ctxComboWidth ) + { + ctxComboWidth = width; + } + } + + ImGui::SetNextItemWidth( ctxComboWidth ); + if( ImGui::BeginCombo( "##zonestatsctx", ctxName->c_str() ) ) + { + for( uint8_t i = 0; i < ctxs.size(); i++ ) + { + const std::string* name = &m_worker.GetCtxName( i ); + if( ImGui::Selectable( name->c_str() ) ) + { + if( i != *ctxIdx ) + { + m_flameGraphInvariant.Reset(); + } + *ctxIdx = i; + ctxName = name; + } + } + ImGui::EndCombo(); + } +} + } diff --git a/profiler/src/profiler/TracyView_ZoneInfo.cpp b/profiler/src/profiler/TracyView_ZoneInfo.cpp index f1b6d3fbfb..b8b13ab1f9 100644 --- a/profiler/src/profiler/TracyView_ZoneInfo.cpp +++ b/profiler/src/profiler/TracyView_ZoneInfo.cpp @@ -22,12 +22,6 @@ inline uint32_t GetZoneCallstack( const ZoneEvent& ev, const Worker& return worker.GetZoneExtra( ev ).callstack.Val(); } -template<> -inline uint32_t GetZoneCallstack( const GpuEvent& ev, const Worker& worker ) -{ - return ev.callstack.Val(); -} - void View::CalcZoneTimeData( unordered_flat_map& data, int64_t& ztime, const ZoneEvent& zone ) { assert( zone.HasChildren() ); @@ -283,10 +277,6 @@ void View::DrawInfoWindow() { DrawZoneInfoWindow(); } - else if( m_gpuInfoWindow ) - { - DrawGpuInfoWindow(); - } } void View::DrawZoneInfoWindow() @@ -301,6 +291,8 @@ void View::DrawZoneInfoWindow() ImGui::Begin( "Zone info", &show, ImGuiWindowFlags_NoScrollbar | ImGuiWindowFlags_NoScrollWithMouse ); if( !ImGui::GetCurrentWindowRead()->SkipItems ) { + auto thread = GetZoneThreadData( ev ); + assert( thread ); if( ImGui::Button( ICON_FA_MICROSCOPE " Zoom to zone" ) ) { ZoomToZone( ev ); @@ -315,10 +307,10 @@ void View::DrawZoneInfoWindow() } } #ifndef TRACY_NO_STATISTICS - if( m_worker.AreSourceLocationZonesReady() ) + if( thread->ctx->AreSourceLocationZonesReady() ) { const auto sl = ev.SrcLoc(); - const auto& slz = m_worker.GetZonesForSourceLocation( sl ); + const auto& slz = thread->ctx->GetZonesForSourceLocation( sl ); if( !slz.zones.empty() ) { ImGui::SameLine(); @@ -376,9 +368,7 @@ void View::DrawZoneInfoWindow() ImGui::Separator(); - auto threadData = GetZoneThreadData( ev ); - assert( threadData ); - const auto tid = threadData->id; + const auto tid = thread->id; if( m_worker.HasZoneExtra( ev ) && m_worker.GetZoneExtra( ev ).name.Active() ) { ImGui::PushFont( g_fonts.normal, FontBig ); @@ -464,9 +454,9 @@ void View::DrawZoneInfoWindow() TextFocused( "Wall clock time:", std::asctime( std::localtime( &ts) ) ); TextFocused( "Execution time:", TimeToString( ztime ) ); #ifndef TRACY_NO_STATISTICS - if( m_worker.AreSourceLocationZonesReady() ) + if( thread->ctx->AreSourceLocationZonesReady() ) { - auto& zoneData = m_worker.GetZonesForSourceLocation( ev.SrcLoc() ); + auto& zoneData = thread->ctx->GetZonesForSourceLocation( ev.SrcLoc() ); if( zoneData.total > 0 ) { ImGui::SameLine(); @@ -482,6 +472,48 @@ void View::DrawZoneInfoWindow() ImGui::SameLine(); TextDisabledUnformatted( buf ); } + + if( thread->ctx->type != ZoneContextType::CPU ) + { + assert( m_worker.HasZoneExtra( ev )); + auto& extra = m_worker.GetZoneExtra( ev ); + TextFocused( "CPU command setup time:", TimeToString( extra.otherEnd.Val() - extra.otherStart.Val() ) ); + if( !thread->ctx ) + { + TextFocused( "Delay to execution:", TimeToString( ev.Start() - extra.otherStart.Val() ) ); + } + else + { + int64_t begin; + if( thread->timeline.is_magic() ) + { + begin = ( (Vector*)&thread->timeline )->front().Start(); + } + else + { + begin = thread->timeline.front()->Start(); + } + const auto drift = GpuDrift( thread->ctx ); + TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.Start(), begin, drift ) - extra.otherStart.Val() ) ); + } + } + + if( m_worker.HasZoneExtra( ev ) && thread->ctx->notes.contains( m_worker.GetZoneExtra( ev ).query_id ) ) + { + auto& extra = m_worker.GetZoneExtra( ev ); + for( auto& p : thread->ctx->notes.at( extra.query_id ) ) + { + if( thread->ctx->noteNames.count( p.first ) ) + { + TextFocused( m_worker.GetString( thread->ctx->noteNames.at( p.first ) ), RealToString( p.second ) ); + } + else + { + TextFocused( RealToString( p.first ), RealToString( p.second ) ); + } + } + } + const auto ctx = m_worker.GetContextSwitchData( tid ); if( ctx ) { @@ -501,7 +533,7 @@ void View::DrawZoneInfoWindow() TextDisabledUnformatted( "(100%)" ); ImGui::Separator(); TextFocused( "Running state regions:", "1" ); - if( !threadData->isFiber ) TextFocused( "CPU:", RealToString( it->Cpu() ) ); + if( !thread->isFiber ) TextFocused( "CPU:", RealToString( it->Cpu() ) ); } } else if( cnt > 1 ) @@ -534,7 +566,7 @@ void View::DrawZoneInfoWindow() } TextFocused( "Running state regions:", RealToString( cnt ) ); - if( !threadData->isFiber ) + if( !thread->isFiber ) { int numCpus = 0; for( int i=0; i<256; i++ ) numCpus += cpus[i]; @@ -600,14 +632,14 @@ void View::DrawZoneInfoWindow() const int64_t adjust = m_ctxSwitchTimeRelativeToZone ? ev.Start() : 0; const auto wrsz = eit - bit; - const auto numColumns = threadData->isFiber ? 4 : 6; + const auto numColumns = thread->isFiber ? 4 : 6; if( ImGui::BeginTable( "##waitregions", numColumns, ImGuiTableFlags_Resizable | ImGuiTableFlags_ScrollY | ImGuiTableFlags_Reorderable | ImGuiTableFlags_Hideable, ImVec2( 0, ImGui::GetTextLineHeightWithSpacing() * std::min( 1+wrsz, 15 ) ) ) ) { ImGui::TableSetupScrollFreeze( 0, 1 ); ImGui::TableSetupColumn( "Begin" ); ImGui::TableSetupColumn( "End" ); ImGui::TableSetupColumn( "Time" ); - if( threadData->isFiber ) + if( thread->isFiber ) { ImGui::TableSetupColumn( "Thread" ); } @@ -650,7 +682,7 @@ void View::DrawZoneInfoWindow() ZoomToRange( cend, cwakeup ); } ImGui::TableNextColumn(); - if( threadData->isFiber ) + if( thread->isFiber ) { const auto ftid = m_worker.DecompressThread( bit[i].Thread() ); ImGui::TextUnformatted( m_worker.GetThreadName( ftid ) ); @@ -910,14 +942,14 @@ void View::DrawZoneInfoWindow() ImGui::Separator(); { - if( threadData->messages.empty() ) + if( thread->messages.empty() ) { TextDisabledUnformatted( "No messages" ); } else { - auto msgit = std::lower_bound( threadData->messages.begin(), threadData->messages.end(), ev.Start(), [] ( const auto& lhs, const auto& rhs ) { return lhs->time < rhs; } ); - auto msgend = std::lower_bound( msgit, threadData->messages.end(), end+1, [] ( const auto& lhs, const auto& rhs ) { return lhs->time < rhs; } ); + auto msgit = std::lower_bound( thread->messages.begin(), thread->messages.end(), ev.Start(), [] ( const auto& lhs, const auto& rhs ) { return lhs->time < rhs; } ); + auto msgend = std::lower_bound( msgit, thread->messages.end(), end+1, [] ( const auto& lhs, const auto& rhs ) { return lhs->time < rhs; } ); const auto dist = std::distance( msgit, msgend ); if( dist == 0 ) @@ -1010,7 +1042,7 @@ void View::DrawZoneInfoWindow() parent = GetZoneParent( *parent ); } int idx = 0; - DrawZoneTrace( &ev, zoneTrace, m_worker, m_zoneinfoBuzzAnim, *this, m_showUnknownFrames, [&idx, this] ( const ZoneEvent* v, int& fidx ) { + DrawZoneTrace( &ev, zoneTrace, m_worker, m_zoneinfoBuzzAnim, *this, m_showUnknownFrames, [&idx, this, thread] ( const ZoneEvent* v, int& fidx ) { ImGui::TextDisabled( "%i.", fidx++ ); ImGui::SameLine(); const auto& srcloc = m_worker.GetSourceLocation( v->SrcLoc() ); @@ -1055,7 +1087,7 @@ void View::DrawZoneInfoWindow() { ZoomToZone( *v ); } - ZoneTooltip( *v ); + ZoneTooltip( *v, *thread ); } } ); @@ -1069,11 +1101,11 @@ void View::DrawZoneInfoWindow() { if( children.is_magic() ) { - DrawZoneInfoChildren>( *(Vector*)( &children ), ztime ); + DrawZoneInfoChildren>( *(Vector*)( &children ), ztime, *thread ); } else { - DrawZoneInfoChildren>( children, ztime ); + DrawZoneInfoChildren>( children, ztime, *thread ); } ImGui::TreePop(); } @@ -1207,7 +1239,7 @@ void View::DrawZoneInfoWindow() } template -void View::DrawZoneInfoChildren( const V& children, int64_t ztime ) +void View::DrawZoneInfoChildren( const V& children, int64_t ztime, const ThreadData& thread ) { Adapter a; const auto rztime = 1.0 / ztime; @@ -1287,7 +1319,7 @@ void View::DrawZoneInfoChildren( const V& children, int64_t ztime ) { ZoomToZone( cev ); } - ZoneTooltip( cev ); + ZoneTooltip( cev, thread ); } ImGui::PopID(); } @@ -1356,7 +1388,7 @@ void View::DrawZoneInfoChildren( const V& children, int64_t ztime ) { ZoomToZone( cev ); } - ZoneTooltip( cev ); + ZoneTooltip( cev, thread ); } ImGui::PopID(); ImGui::Unindent(); @@ -1422,7 +1454,7 @@ void View::DrawZoneInfoChildren( const V& children, int64_t ztime ) { ZoomToZone( cev ); } - ZoneTooltip( cev ); + ZoneTooltip( cev, thread ); } ImGui::PopID(); ImGui::NextColumn(); @@ -1437,446 +1469,6 @@ void View::DrawZoneInfoChildren( const V& children, int64_t ztime ) } } -void View::DrawGpuInfoWindow() -{ - auto& ev = *m_gpuInfoWindow; - const auto& srcloc = m_worker.GetSourceLocation( ev.SrcLoc() ); - - const auto scale = GetScale(); - ImGui::SetNextWindowSize( ImVec2( 500 * scale, 600 * scale), ImGuiCond_FirstUseEver ); - bool show = true; - ImGui::Begin( "Zone info", &show, ImGuiWindowFlags_NoScrollbar | ImGuiWindowFlags_NoScrollWithMouse ); - if( !ImGui::GetCurrentWindowRead()->SkipItems ) - { - if( ImGui::Button( ICON_FA_MICROSCOPE " Zoom to zone" ) ) - { - ZoomToZone( ev ); - } - auto parent = GetZoneParent( ev ); - if( parent ) - { - ImGui::SameLine(); - if( ImGui::Button( ICON_FA_ARROW_UP " Go to parent" ) ) - { - ShowZoneInfo( *parent, m_gpuInfoWindowThread ); - } - } - if( ev.callstack.Val() != 0 ) - { - ImGui::SameLine(); - bool hilite = m_callstackInfoWindow == ev.callstack.Val(); - if( hilite ) - { - SetButtonHighlightColor(); - } - if( ImGui::Button( ICON_FA_ALIGN_JUSTIFY " Call stack" ) ) - { - m_callstackInfoWindow = ev.callstack.Val(); - } - if( hilite ) - { - ImGui::PopStyleColor( 3 ); - } - } - const auto fileName = m_worker.GetString( srcloc.file ); - if( SourceFileValid( fileName, m_worker.GetCaptureTime(), *this, m_worker ) ) - { - ImGui::SameLine(); - bool hilite = m_sourceViewFile == fileName; - if( hilite ) - { - SetButtonHighlightColor(); - } - if( ImGui::Button( ICON_FA_FILE_LINES " Source" ) ) - { - ViewSourceCheckKeyMod( fileName, srcloc.line, m_worker.GetString( srcloc.function ) ); - } - if( hilite ) - { - ImGui::PopStyleColor( 3 ); - } - } - if( !m_gpuInfoStack.empty() ) - { - ImGui::SameLine(); - if( ImGui::Button( ICON_FA_ARROW_LEFT " Go back" ) ) - { - m_gpuInfoWindow = m_gpuInfoStack.back_and_pop(); - } - } - - ImGui::Separator(); - - const auto tid = GetZoneThread( ev ); - ImGui::PushFont( g_fonts.normal, FontBig ); - TextFocusedClipboard( "Zone name:", m_worker.GetString( srcloc.name ), m_worker.GetString( srcloc.name ), 1, g_fonts.normal, FontNormal ); - ImGui::SameLine(); - ImGui::PopFont(); - TextFocusedClipboard( "Function:", m_worker.GetString( srcloc.function ), m_worker.GetString( srcloc.function ), 2 ); - TextFocusedClipboard( "Location:", LocationToString( m_worker.GetString( srcloc.file ), srcloc.line ), LocationToString( m_worker.GetString( srcloc.file ), srcloc.line ), 3 ); - SmallColorBox( GetThreadColor( tid, 0 ) ); - ImGui::SameLine(); - TextFocused( "Thread:", m_worker.GetThreadName( tid ) ); - ImGui::SameLine(); - ImGui::TextDisabled( "(%s)", RealToString( tid ) ); - if( m_worker.IsThreadFiber( tid ) ) - { - ImGui::SameLine(); - TextColoredUnformatted( ImVec4( 0.2f, 0.6f, 0.2f, 1.f ), "Fiber" ); - } - ImGui::Separator(); - ImGui::BeginChild( "##gpuinfo" ); - - const auto end = m_worker.GetZoneEnd( ev ); - const auto ztime = end - ev.GpuStart(); - const auto selftime = GetZoneSelfTime( ev ); - TextFocused( "Time from start of program:", TimeToStringExact( ev.GpuStart() ) ); - TextFocused( "GPU execution time:", TimeToString( ztime ) ); - TextFocused( "GPU self time:", TimeToString( selftime ) ); - if( ztime != 0 ) - { - char buf[64]; - PrintStringPercent( buf, 100.f * selftime / ztime ); - ImGui::SameLine(); - TextDisabledUnformatted( buf ); - } - TextFocused( "CPU command setup time:", TimeToString( ev.CpuEnd() - ev.CpuStart() ) ); - auto ctx = GetZoneCtx( ev ); - if( !ctx ) - { - TextFocused( "Delay to execution:", TimeToString( ev.GpuStart() - ev.CpuStart() ) ); - } - else - { - const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.Thread() ) ); - assert( td != ctx->threadData.end() ); - int64_t begin; - if( td->second.timeline.is_magic() ) - { - begin = ((Vector*)&td->second.timeline)->front().GpuStart(); - } - else - { - begin = td->second.timeline.front()->GpuStart(); - } - const auto drift = GpuDrift( ctx ); - TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.GpuStart(), begin, drift ) - ev.CpuStart() ) ); - } - - if( ctx->notes.contains( ev.query_id ) ) - { - for( auto& p : ctx->notes.at( ev.query_id ) ) - { - if( ctx->noteNames.count( p.first ) ) - { - TextFocused( m_worker.GetString( ctx->noteNames.at( p.first ) ), RealToString( p.second ) ); - } - else - { - TextFocused( RealToString( p.first ), RealToString( p.second ) ); - } - } - } - - ImGui::Separator(); - - std::vector zoneTrace; - while( parent ) - { - zoneTrace.emplace_back( parent ); - parent = GetZoneParent( *parent ); - } - int idx = 0; - DrawZoneTrace( &ev, zoneTrace, m_worker, m_zoneinfoBuzzAnim, *this, m_showUnknownFrames, [&idx, this] ( const GpuEvent* v, int& fidx ) { - ImGui::TextDisabled( "%i.", fidx++ ); - ImGui::SameLine(); - const auto& srcloc = m_worker.GetSourceLocation( v->SrcLoc() ); - const auto txt = m_worker.GetZoneName( srcloc ); - ImGui::PushID( idx++ ); - auto sel = ImGui::Selectable( txt, false ); - auto hover = ImGui::IsItemHovered(); - const auto fileName = m_worker.GetString( srcloc.file ); - if( m_zoneinfoBuzzAnim.Match( v ) ) - { - const auto time = m_zoneinfoBuzzAnim.Time(); - const auto indentVal = sin( time * 60.f ) * 10.f * time; - ImGui::SameLine( 0, ImGui::GetStyle().ItemSpacing.x + indentVal ); - } - else - { - ImGui::SameLine(); - } - ImGui::TextDisabled( "(%s) %s", TimeToString( m_worker.GetZoneEnd( *v ) - v->GpuStart() ), LocationToString( fileName, srcloc.line ) ); - ImGui::PopID(); - if( ImGui::IsItemClicked( 1 ) ) - { - if( SourceFileValid( fileName, m_worker.GetCaptureTime(), *this, m_worker ) ) - { - ViewSourceCheckKeyMod( fileName, srcloc.line, m_worker.GetString( srcloc.function ) ); - } - else - { - m_zoneinfoBuzzAnim.Enable( v, 0.5f ); - } - } - if( sel ) - { - ShowZoneInfo( *v, m_gpuInfoWindowThread ); - } - if( hover ) - { - m_gpuHighlight = v; - if( IsMouseClicked( 2 ) ) - { - ZoomToZone( *v ); - } - ZoneTooltip( *v ); - } - } ); - - if( ev.Child() >= 0 ) - { - const auto& children = m_worker.GetGpuChildren( ev.Child() ); - bool expand = ImGui::TreeNode( "Child zones" ); - ImGui::SameLine(); - ImGui::TextDisabled( "(%s)", RealToString( children.size() ) ); - if( expand ) - { - if( children.is_magic() ) - { - DrawGpuInfoChildren>( *(Vector*)( &children ), ztime ); - } - else - { - DrawGpuInfoChildren>( children, ztime ); - } - ImGui::TreePop(); - } - } - - ImGui::EndChild(); - } - ImGui::End(); - - if( !show ) - { - m_gpuInfoWindow = nullptr; - m_gpuInfoStack.clear(); - } -} - -template -void View::DrawGpuInfoChildren( const V& children, int64_t ztime ) -{ - Adapter a; - const auto rztime = 1.0 / ztime; - const auto ty = ImGui::GetTextLineHeight(); - - ImGui::SameLine(); - SmallCheckbox( ICON_FA_LAYER_GROUP " Group children locations", &m_groupChildrenLocations ); - - if( m_groupChildrenLocations ) - { - struct ChildGroup - { - int16_t srcloc; - uint64_t t; - Vector v; - }; - uint64_t ctime = 0; - unordered_flat_map cmap; - cmap.reserve( 128 ); - for( size_t i=0; isecond.t += ct; - it->second.v.push_back( i ); - } - - auto msz = cmap.size(); - Vector cgvec; - cgvec.reserve_and_use( msz ); - size_t idx = 0; - for( auto& it : cmap ) - { - cgvec[idx++] = &it.second; - } - - pdqsort_branchless( cgvec.begin(), cgvec.end(), []( const auto& lhs, const auto& rhs ) { return lhs->t > rhs->t; } ); - - ImGui::Columns( 2 ); - ImGui::Indent( ImGui::GetTreeNodeToLabelSpacing() ); - TextColoredUnformatted( ImVec4( 1.0f, 1.0f, 0.4f, 1.0f ), "Self time" ); - ImGui::Unindent( ImGui::GetTreeNodeToLabelSpacing() ); - ImGui::NextColumn(); - char buf[128]; - PrintStringPercent( buf, TimeToString( ztime - ctime ), double( ztime - ctime ) / ztime * 100 ); - ImGui::ProgressBar( double( ztime - ctime ) * rztime, ImVec2( -1, ty ), buf ); - ImGui::NextColumn(); - for( size_t i=0; i( new uint64_t[cgr.v.size()] ); - auto cti = std::unique_ptr( new uint32_t[cgr.v.size()] ); - for( size_t i=0; i ctt[rhs]; } ); - - for( size_t i=0; i( new uint64_t[children.size()] ); - auto cti = std::unique_ptr( new uint32_t[children.size()] ); - uint64_t ctime = 0; - for( size_t i=0; i ctt[rhs]; } ); - - ImGui::Columns( 2 ); - TextColoredUnformatted( ImVec4( 1.0f, 1.0f, 0.4f, 1.0f ), "Self time" ); - ImGui::NextColumn(); - char buf[128]; - PrintStringPercent( buf, TimeToString( ztime - ctime ), double( ztime - ctime ) / ztime * 100 ); - ImGui::ProgressBar( double( ztime - ctime ) / ztime, ImVec2( -1, ty ), buf ); - ImGui::NextColumn(); - for( size_t i=0; iAreSourceLocationZonesReady() ) { - auto& zoneData = m_worker.GetZonesForSourceLocation( ev.SrcLoc() ); + auto& zoneData = thread.ctx->GetZonesForSourceLocation( ev.SrcLoc() ); if( zoneData.total > 0 ) { ImGui::SameLine(); @@ -1961,6 +1531,48 @@ void View::ZoneTooltip( const ZoneEvent& ev ) ImGui::SameLine(); TextDisabledUnformatted( buf ); } + + if( thread.ctx->type != ZoneContextType::CPU ) + { + assert( m_worker.HasZoneExtra( ev ) ); + auto& extra = m_worker.GetZoneExtra( ev ); + TextFocused( "CPU command setup time:", TimeToString( extra.otherEnd.Val() - extra.otherStart.Val() ) ); + if( !thread.ctx ) + { + TextFocused( "Delay to execution:", TimeToString( ev.Start() - extra.otherStart.Val() ) ); + } + else + { + int64_t begin; + if( thread.timeline.is_magic() ) + { + begin = ( (Vector*)&thread.timeline )->front().Start(); + } + else + { + begin = thread.timeline.front()->Start(); + } + const auto drift = GpuDrift( thread.ctx ); + TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.Start(), begin, drift ) - extra.otherStart.Val() ) ); + } + } + + if( m_worker.HasZoneExtra( ev ) && thread.ctx->notes.contains( m_worker.GetZoneExtra( ev ).query_id ) ) + { + auto& extra = m_worker.GetZoneExtra( ev ); + for( auto& p : thread.ctx->notes.at( extra.query_id ) ) + { + if( thread.ctx->noteNames.count( p.first ) ) + { + TextFocused( m_worker.GetString( thread.ctx->noteNames.at( p.first ) ), RealToString( p.second ) ); + } + else + { + TextFocused( RealToString( p.first ), RealToString( p.second ) ); + } + } + } + const auto ctx = m_worker.GetContextSwitchData( tid ); if( ctx ) { @@ -1987,80 +1599,4 @@ void View::ZoneTooltip( const ZoneEvent& ev ) ImGui::EndTooltip(); } -void View::ZoneTooltip( const GpuEvent& ev ) -{ - const auto tid = GetZoneThread( ev ); - const auto& srcloc = m_worker.GetSourceLocation( ev.SrcLoc() ); - const auto end = m_worker.GetZoneEnd( ev ); - const auto ztime = end - ev.GpuStart(); - const auto selftime = GetZoneSelfTime( ev ); - - ImGui::BeginTooltip(); - ImGui::TextUnformatted( m_worker.GetString( srcloc.name ) ); - ImGui::TextUnformatted( m_worker.GetString( srcloc.function ) ); - ImGui::Separator(); - SmallColorBox( GetSrcLocColor( srcloc, 0 ) ); - ImGui::SameLine(); - ImGui::TextUnformatted( LocationToString( m_worker.GetString( srcloc.file ), srcloc.line ) ); - SmallColorBox( GetThreadColor( tid, 0 ) ); - ImGui::SameLine(); - TextFocused( "Thread:", m_worker.GetThreadName( tid ) ); - ImGui::SameLine(); - ImGui::TextDisabled( "(%s)", RealToString( tid ) ); - if( m_worker.IsThreadFiber( tid ) ) - { - ImGui::SameLine(); - TextColoredUnformatted( ImVec4( 0.2f, 0.6f, 0.2f, 1.f ), "Fiber" ); - } - ImGui::Separator(); - TextFocused( "GPU execution time:", TimeToString( ztime ) ); - TextFocused( "GPU self time:", TimeToString( selftime ) ); - if( ztime != 0 ) - { - char buf[64]; - PrintStringPercent( buf, 100.f * selftime / ztime ); - ImGui::SameLine(); - TextDisabledUnformatted( buf ); - } - TextFocused( "CPU command setup time:", TimeToString( ev.CpuEnd() - ev.CpuStart() ) ); - auto ctx = GetZoneCtx( ev ); - if( !ctx ) - { - TextFocused( "Delay to execution:", TimeToString( ev.GpuStart() - ev.CpuStart() ) ); - } - else - { - const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.Thread() ) ); - assert( td != ctx->threadData.end() ); - int64_t begin; - if( td->second.timeline.is_magic() ) - { - begin = ((Vector*)&td->second.timeline)->front().GpuStart(); - } - else - { - begin = td->second.timeline.front()->GpuStart(); - } - const auto drift = GpuDrift( ctx ); - TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.GpuStart(), begin, drift ) - ev.CpuStart() ) ); - } - - if( ctx->notes.contains( ev.query_id ) ) - { - for( auto& p : ctx->notes.at( ev.query_id ) ) - { - if( ctx->noteNames.count( p.first ) ) - { - TextFocused( m_worker.GetString( ctx->noteNames.at( p.first ) ), RealToString( p.second ) ); - } - else - { - TextFocused( RealToString( p.first ), RealToString( p.second ) ); - } - } - } - - ImGui::EndTooltip(); -} - } diff --git a/profiler/src/profiler/TracyView_ZoneTimeline.cpp b/profiler/src/profiler/TracyView_ZoneTimeline.cpp index 7cdac67440..57da207e8e 100644 --- a/profiler/src/profiler/TracyView_ZoneTimeline.cpp +++ b/profiler/src/profiler/TracyView_ZoneTimeline.cpp @@ -76,7 +76,7 @@ void View::DrawThread( const TimelineContext& ctx, const ThreadData& thread, con if( !draw.empty() && yPos <= yMax && yPos + ostep * croppedDepth >= yMin ) { // Only apply margin when croppingActive to avoid text moving around when mouse is getting close to the cropper widget - DrawZoneList( ctx, draw, offset, thread.id, croppedDepth, croppingActive ? cropperAdditionalMargin + GetScale() /* Ensure text has a bit of space for text */ : 0.f ); + DrawZoneList( ctx, draw, offset, thread, croppedDepth, croppingActive ? cropperAdditionalMargin + GetScale() /* Ensure text has a bit of space for text */ : 0.f ); } offset += ostep * croppedDepth; @@ -86,9 +86,9 @@ void View::DrawThread( const TimelineContext& ctx, const ThreadData& thread, con assert( ctxSwitch ); DrawContextSwitchList( ctx, ctxDraw, ctxSwitch->v, ctxOffset, offset, thread.isFiber ); } - if( hasSamples && !samplesDraw.empty() ) + if( thread.ctx->type == ZoneContextType::CPU && hasSamples && !samplesDraw.empty() ) { - DrawSampleList( ctx, samplesDraw, thread.samples, sampleOffset ); + DrawSampleList( ctx, samplesDraw, static_cast(thread).samples, sampleOffset ); } if( m_vd.drawLocks ) @@ -209,11 +209,6 @@ void View::DrawThreadOverlays( const ThreadData& thread, const ImVec2& ul, const draw->AddRectFilled( ul, dr, 0x228888DD ); draw->AddRect( ul, dr, 0x448888DD ); } - if( m_gpuInfoWindow && m_gpuInfoWindowThread == thread.id ) - { - draw->AddRectFilled( ul, dr, 0x2288DD88 ); - draw->AddRect( ul, dr, 0x4488DD88 ); - } if( m_cpuDataThread == thread.id ) { draw->AddRectFilled( ul, dr, 0x2DFF8888 ); @@ -227,7 +222,7 @@ void View::DrawThreadOverlays( const ThreadData& thread, const ImVec2& ul, const } -void View::DrawZoneList( const TimelineContext& ctx, const std::vector& drawList, int _offset, uint64_t tid, int maxDepth, double margin ) +void View::DrawZoneList( const TimelineContext& ctx, const std::vector& drawList, int _offset, const ThreadData& thread, int maxDepth, double margin ) { auto draw = ImGui::GetWindowDrawList(); const auto w = ctx.w; @@ -286,7 +281,7 @@ void View::DrawZoneList( const TimelineContext& ctx, const std::vector 0 ) { @@ -351,7 +346,7 @@ void View::DrawZoneList( const TimelineContext& ctx, const std::vector queue; Profiler profiler; std::atomic lockCounter { 0 }; - std::atomic gpuCtxCounter { 0 }; + std::atomic gpuCtxCounter { 1 }; // 0 reserved for default context std::atomic threadNameData { nullptr }; }; @@ -1425,7 +1425,7 @@ thread_local bool RpThreadInitDone = false; thread_local bool RpThreadShutdown = false; moodycamel::ConcurrentQueue init_order(103) s_queue( QueuePrealloc ); std::atomic init_order(104) s_lockCounter( 0 ); -std::atomic init_order(104) s_gpuCtxCounter( 0 ); +std::atomic init_order(104) s_gpuCtxCounter( 1 ); // 0 reserved for default context thread_local GpuCtxWrapper init_order(104) s_gpuCtx { nullptr }; diff --git a/public/client/TracyRocprof.cpp b/public/client/TracyRocprof.cpp index 370e42ec39..eb8e01df01 100644 --- a/public/client/TracyRocprof.cpp +++ b/public/client/TracyRocprof.cpp @@ -104,7 +104,7 @@ uint8_t gpu_context_allocate( ToolData* data ) tracy::MemWrite( &item->gpuNewContext.period, timestamp_period ); tracy::MemWrite( &item->gpuNewContext.context, context_id ); tracy::MemWrite( &item->gpuNewContext.flags, context_flags ); - tracy::MemWrite( &item->gpuNewContext.type, tracy::GpuContextType::Rocprof ); + tracy::MemWrite( &item->gpuNewContext.type, tracy::ZoneContextType::Rocprof ); tracy::Profiler::QueueSerialFinish(); } @@ -451,6 +451,7 @@ void calibration_thread( void* ptr ) { while( !TracyIsStarted ) ; + SetThreadName( "rocprofiler calibration" ); ToolData* data = static_cast( ptr ); data->context_id = gpu_context_allocate( data ); const char* user_counters = GetEnvVar( "TRACY_ROCPROF_COUNTERS" ); diff --git a/public/common/TracyQueue.hpp b/public/common/TracyQueue.hpp index 765c83c7d5..a1c4398308 100644 --- a/public/common/TracyQueue.hpp +++ b/public/common/TracyQueue.hpp @@ -398,7 +398,7 @@ struct QueueMessageColorFatThread : public QueueMessageColorFat }; // Don't change order, only add new entries at the end, this is also used on trace dumps! -enum class GpuContextType : uint8_t +enum class ZoneContextType : uint8_t { Invalid, OpenGl, @@ -409,7 +409,8 @@ enum class GpuContextType : uint8_t Metal, Custom, CUDA, - Rocprof + Rocprof, + CPU }; enum GpuContextFlags : uint8_t @@ -425,7 +426,7 @@ struct QueueGpuNewContext float period; uint8_t context; GpuContextFlags flags; - GpuContextType type; + ZoneContextType type; }; struct QueueGpuZoneBeginLean diff --git a/public/common/TracyVersion.hpp b/public/common/TracyVersion.hpp index 7d704c500d..1b6fc48c09 100644 --- a/public/common/TracyVersion.hpp +++ b/public/common/TracyVersion.hpp @@ -6,8 +6,8 @@ namespace tracy namespace Version { enum { Major = 0 }; -enum { Minor = 12 }; -enum { Patch = 4 }; +enum { Minor = 13 }; +enum { Patch = 0 }; } } diff --git a/server/TracyContext.cpp b/server/TracyContext.cpp new file mode 100644 index 0000000000..cb62f0940b --- /dev/null +++ b/server/TracyContext.cpp @@ -0,0 +1,65 @@ +#include "TracyContext.hpp" + +namespace tracy +{ + +const ThreadData* ZoneContext::GetThreadData( uint64_t tid ) const +{ + auto it = threadData.find( tid ); + if( it == threadData.end() ) return nullptr; + return it->second; +} + +#ifndef TRACY_NO_STATISTICS +ZoneContext::SourceLocationZones& ZoneContext::GetZonesForSourceLocation( int16_t srcloc ) +{ + assert( AreSourceLocationZonesReady() ); + static SourceLocationZones empty; + auto it = sourceLocationZones.find( srcloc ); + return it != sourceLocationZones.end() ? it->second : empty; +} + +const ZoneContext::SourceLocationZones& ZoneContext::GetZonesForSourceLocation( int16_t srcloc ) const +{ + assert( AreSourceLocationZonesReady() ); + static const SourceLocationZones empty; + auto it = sourceLocationZones.find( srcloc ); + return it != sourceLocationZones.end() ? it->second : empty; +} + +ZoneContext::SourceLocationZones* ZoneContext::GetSourceLocationZonesReal( uint16_t srcloc ) +{ + auto it = sourceLocationZones.find( srcloc ); + assert( it != sourceLocationZones.end() ); + srclocZonesLast.first = srcloc; + srclocZonesLast.second = &it->second; + return &it->second; +} + +void ZoneContext::InitSourceLocationZones( uint16_t srcloc ) +{ + auto res = sourceLocationZones.emplace( srcloc, SourceLocationZones() ); + srclocZonesLast.first = srcloc; + srclocZonesLast.second = &res.first->second; +} + +#else +uint64_t* ZoneContext::GetSourceLocationZonesCntReal( uint16_t srcloc ) +{ + auto it = sourceLocationZonesCnt.find( srcloc ); + assert( it != sourceLocationZonesCnt.end() ); + srclocCntLast.first = srcloc; + srclocCntLast.second = &it->second; + return &it->second; +} + +void InitSourceLocationZonesCnt( uint16_t srcloc ) +{ + auto res = sourceLocationZonesCnt.emplace( srcloc, 0 ); + srclocCntLast.first = srcloc; + srclocCntLast.second = &res.first->second; +} + +#endif + +} // namespace tracy diff --git a/server/TracyContext.hpp b/server/TracyContext.hpp new file mode 100644 index 0000000000..969fb68aaf --- /dev/null +++ b/server/TracyContext.hpp @@ -0,0 +1,192 @@ +#ifndef __TRACYCONTEXT_HPP__ +#define __TRACYCONTEXT_HPP__ + +#include "TracyEvent.hpp" +#include "TracyShortPtr.hpp" +#include "tracy_robin_hood.h" + +namespace tracy +{ + +constexpr const char* ZoneContextNames[] = { + "Invalid", + "OpenGL", + "Vulkan", + "OpenCL", + "Direct3D 12", + "Direct3D 11", + "Metal", + "Custom", + "CUDA", + "Rocprof", + "CPU" +}; + +struct ZoneContext; +class Worker; + +struct ThreadData +{ + uint64_t id; + uint64_t count; + Vector> timeline; + Vector> stack; + Vector> messages; + uint32_t nextZoneId; + Vector zoneIdStack; + uint8_t isFiber; + ThreadData* fiber; + uint8_t* stackCount; + int32_t groupHint; + ZoneContext* ctx; +#ifndef TRACY_NO_STATISTICS + Vector childTimeStack; +#endif + + tracy_force_inline void IncStackCount( int16_t srcloc ) { stackCount[uint16_t( srcloc )]++; } + tracy_force_inline bool DecStackCount( int16_t srcloc ) { return --stackCount[uint16_t( srcloc )] != 0; } +}; + +struct ZoneContext +{ + struct ZoneThreadData + { + tracy_force_inline ZoneEvent* Zone() const { return (ZoneEvent*)( _zone_thread >> 16 ); } + tracy_force_inline void SetZone( ZoneEvent* zone ) + { + auto z64 = (uint64_t)zone; + assert( ( z64 & 0xFFFF000000000000 ) == 0 ); + memcpy( ( (char*)&_zone_thread ) + 2, &z64, 4 ); + memcpy( ( (char*)&_zone_thread ) + 6, ( (char*)&z64 ) + 4, 2 ); + } + tracy_force_inline uint16_t Thread() const { return uint16_t( _zone_thread & 0xFFFF ); } + tracy_force_inline void SetThread( uint16_t thread ) { memcpy( &_zone_thread, &thread, 2 ); } + + uint64_t _zone_thread; + }; + enum + { + ZoneThreadDataSize = sizeof( ZoneThreadData ) + }; + +private: + struct SourceLocationZones + { + struct ZtdSort + { + bool operator()( const ZoneThreadData& lhs, const ZoneThreadData& rhs ) const { return lhs.Zone()->Start() < rhs.Zone()->Start(); } + }; + + SortedVector zones; + int64_t min = std::numeric_limits::max(); + int64_t max = std::numeric_limits::min(); + int64_t total = 0; + double sumSq = 0; + int64_t selfMin = std::numeric_limits::max(); + int64_t selfMax = std::numeric_limits::min(); + int64_t selfTotal = 0; + size_t nonReentrantCount = 0; + int64_t nonReentrantMin = std::numeric_limits::max(); + int64_t nonReentrantMax = std::numeric_limits::min(); + int64_t nonReentrantTotal = 0; + unordered_flat_map threadCnt; + }; + +#ifndef TRACY_NO_STATISTICS + unordered_flat_map sourceLocationZones; + bool sourceLocationZonesReady = false; +#else + unordered_flat_map sourceLocationZonesCnt; +#endif + +#ifndef TRACY_NO_STATISTICS + std::pair srclocZonesLast = std::make_pair( 0, nullptr ); +#else + std::pair srclocCntLast = std::make_pair( 0, nullptr ); +#endif + +public: + StringIdx name; + std::string longName; + ZoneContextType type; + unordered_flat_map threadData; + Vector threads; + uint64_t count; + unordered_flat_map noteNames; + unordered_flat_map> notes; + + uint64_t threadCtx = 0; + ThreadData* threadCtxData = nullptr; + int64_t refTimeThread = 0; + + const ThreadData* GetThreadData( uint64_t tid ) const; + + SourceLocationZones& GetZonesForSourceLocation( int16_t srcloc ); + const SourceLocationZones& GetZonesForSourceLocation( int16_t srcloc ) const; + const unordered_flat_map& GetSourceLocationZones() const { return sourceLocationZones; } + bool AreSourceLocationZonesReady() const { return sourceLocationZonesReady; } + void SetSourceLocationZonesReady() { sourceLocationZonesReady = true; } + +#ifndef TRACY_NO_STATISTICS + SourceLocationZones* GetSourceLocationZones( uint16_t srcloc ) + { + if( srclocZonesLast.first == srcloc ) return srclocZonesLast.second; + return GetSourceLocationZonesReal( srcloc ); + } + SourceLocationZones* GetSourceLocationZonesReal( uint16_t srcloc ); + void InitSourceLocationZones( uint16_t srcloc ); +#else + uint64_t* GetSourceLocationZonesCnt( uint16_t srcloc ) + { + if( srclocCntLast.first == srcloc ) return srclocCntLast.second; + return GetSourceLocationZonesCntReal( srcloc ); + } + uint64_t* GetSourceLocationZonesCntReal( uint16_t srcloc ); + void InitSourceLocationZonesCnt( uint16_t srcloc ); +#endif + + friend class Worker; +}; + +struct CPUZoneContext : public ZoneContext +{ + CPUZoneContext() { type = ZoneContextType::CPU; } +}; + +struct CPUThreadData : public ThreadData +{ +#ifndef TRACY_NO_STATISTICS + Vector ghostZones; + uint64_t ghostIdx; + SortedVector postponedSamples; +#endif + Vector samples; + SampleData pendingSample; + Vector ctxSwitchSamples; + uint64_t kernelSampleCnt; +}; + +struct GpuCtxData : public ZoneContext +{ + int64_t timeDiff; + uint64_t thread; + float period; + bool hasPeriod; + bool hasCalibration; + int64_t calibratedGpuTime; + int64_t calibratedCpuTime; + double calibrationMod; + int64_t lastGpuTime; + uint64_t overflow; + uint32_t overflowMul; + short_ptr query[64 * 1024]; +}; + +enum +{ + GpuCtxDataSize = sizeof( GpuCtxData ) +}; + +} // namespace tracy + +#endif /* TRACYCONTEXT_H */ diff --git a/server/TracyEvent.hpp b/server/TracyEvent.hpp index b2276deece..09a82e230e 100644 --- a/server/TracyEvent.hpp +++ b/server/TracyEvent.hpp @@ -232,6 +232,9 @@ struct ZoneExtra StringIdx text; StringIdx name; Int24 color; + Int48 otherStart; + Int48 otherEnd; + uint16_t query_id; // TODO: delete me? }; enum { ZoneExtraSize = sizeof( ZoneExtra ) }; @@ -389,36 +392,6 @@ struct LockHighlight bool blocked; }; - -struct GpuEvent -{ - tracy_force_inline int64_t CpuStart() const { return int64_t( _cpuStart_srcloc ) >> 16; } - tracy_force_inline void SetCpuStart( int64_t cpuStart ) { assert( cpuStart < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_cpuStart_srcloc)+2, &cpuStart, 4 ); memcpy( ((char*)&_cpuStart_srcloc)+6, ((char*)&cpuStart)+4, 2 ); } - tracy_force_inline int64_t CpuEnd() const { return int64_t( _cpuEnd_thread ) >> 16; } - tracy_force_inline void SetCpuEnd( int64_t cpuEnd ) { assert( cpuEnd < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_cpuEnd_thread)+2, &cpuEnd, 4 ); memcpy( ((char*)&_cpuEnd_thread)+6, ((char*)&cpuEnd)+4, 2 ); } - tracy_force_inline int64_t GpuStart() const { return int64_t( _gpuStart_child1 ) >> 16; } - tracy_force_inline void SetGpuStart( int64_t gpuStart ) { /*assert( gpuStart < (int64_t)( 1ull << 47 ) );*/ memcpy( ((char*)&_gpuStart_child1)+2, &gpuStart, 4 ); memcpy( ((char*)&_gpuStart_child1)+6, ((char*)&gpuStart)+4, 2 ); } - tracy_force_inline int64_t GpuEnd() const { return int64_t( _gpuEnd_child2 ) >> 16; } - tracy_force_inline void SetGpuEnd( int64_t gpuEnd ) { assert( gpuEnd < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_gpuEnd_child2)+2, &gpuEnd, 4 ); memcpy( ((char*)&_gpuEnd_child2)+6, ((char*)&gpuEnd)+4, 2 ); } - tracy_force_inline int16_t SrcLoc() const { return int16_t( _cpuStart_srcloc & 0xFFFF ); } - tracy_force_inline void SetSrcLoc( int16_t srcloc ) { memcpy( &_cpuStart_srcloc, &srcloc, 2 ); } - tracy_force_inline uint16_t Thread() const { return uint16_t( _cpuEnd_thread & 0xFFFF ); } - tracy_force_inline void SetThread( uint16_t thread ) { memcpy( &_cpuEnd_thread, &thread, 2 ); } - tracy_force_inline int32_t Child() const { return int32_t( uint32_t( _gpuStart_child1 & 0xFFFF ) | ( uint32_t( _gpuEnd_child2 & 0xFFFF ) << 16 ) ); } - tracy_force_inline void SetChild( int32_t child ) { memcpy( &_gpuStart_child1, &child, 2 ); memcpy( &_gpuEnd_child2, ((char*)&child)+2, 2 ); } - - uint64_t _cpuStart_srcloc; - uint64_t _cpuEnd_thread; - uint64_t _gpuStart_child1; - uint64_t _gpuEnd_child2; - Int24 callstack; - uint16_t query_id; -}; - -enum { GpuEventSize = sizeof( GpuEvent ) }; -static_assert( std::is_standard_layout::value, "GpuEvent is not standard layout" ); - - struct MemEvent { tracy_force_inline uint64_t Ptr() const { return uint64_t( int64_t( _ptr_csalloc1 ) >> 8 ); } @@ -723,66 +696,6 @@ enum { ChildSampleSize = sizeof( ChildSample ) }; #pragma pack( pop ) - -struct ThreadData -{ - uint64_t id; - uint64_t count; - Vector> timeline; - Vector> stack; - Vector> messages; - uint32_t nextZoneId; - Vector zoneIdStack; -#ifndef TRACY_NO_STATISTICS - Vector childTimeStack; - Vector ghostZones; - uint64_t ghostIdx; - SortedVector postponedSamples; -#endif - Vector samples; - SampleData pendingSample; - Vector ctxSwitchSamples; - uint64_t kernelSampleCnt; - uint8_t isFiber; - ThreadData* fiber; - uint8_t* stackCount; - int32_t groupHint; - - tracy_force_inline void IncStackCount( int16_t srcloc ) { stackCount[uint16_t(srcloc)]++; } - tracy_force_inline bool DecStackCount( int16_t srcloc ) { return --stackCount[uint16_t(srcloc)] != 0; } -}; - -struct GpuCtxThreadData -{ - Vector> timeline; - Vector> stack; -}; - -struct GpuCtxData -{ - int64_t timeDiff; - uint64_t thread; - uint64_t count; - float period; - GpuContextType type; - bool hasPeriod; - bool hasCalibration; - int64_t calibratedGpuTime; - int64_t calibratedCpuTime; - double calibrationMod; - int64_t lastGpuTime; - uint64_t overflow; - uint32_t overflowMul; - StringIdx name; - unordered_flat_map threadData; - unordered_flat_map noteNames; - unordered_flat_map> notes; - short_ptr query[64*1024]; -}; - -enum { GpuCtxDataSize = sizeof( GpuCtxData ) }; - - enum class PlotType : uint8_t { User, diff --git a/server/TracyWorker.cpp b/server/TracyWorker.cpp index 483e3233d0..e085bdd416 100644 --- a/server/TracyWorker.cpp +++ b/server/TracyWorker.cpp @@ -56,7 +56,7 @@ static bool SourceFileValid( const char* fn, uint64_t olderThan ) static const uint8_t FileHeader[8] { 't', 'r', 'a', 'c', 'y', Version::Major, Version::Minor, Version::Patch }; enum { FileHeaderMagic = 5 }; static const int CurrentVersion = FileVersion( Version::Major, Version::Minor, Version::Patch ); -static const int MinSupportedVersion = FileVersion( 0, 9, 0 ); +static const int MinSupportedVersion = FileVersion( 0, 13, 0 ); static void UpdateLockCountLockable( LockMap& lockmap, size_t pos ) @@ -284,11 +284,11 @@ Worker::Worker( const char* addr, uint16_t port, int64_t memoryLimit ) m_data.memory = m_slab.AllocInit(); m_data.memNameMap.emplace( 0, m_data.memory ); - memset( (char*)m_gpuCtxMap, 0, sizeof( m_gpuCtxMap ) ); + memset( (char*)m_ctxMap, 0, sizeof( m_ctxMap ) ); + ProcessCpuNewContext(); #ifndef TRACY_NO_STATISTICS - m_data.sourceLocationZonesReady = true; - m_data.gpuSourceLocationZonesReady = true; + GetDefaultCtx().SetSourceLocationZonesReady(); m_data.callstackSamplesReady = true; m_data.ghostZonesReady = true; m_data.ctxUsageReady = true; @@ -340,6 +340,8 @@ Worker::Worker( const char* name, const char* program, const std::vectorsecond; - + GetDefaultCtx().InitSourceLocationZones(key); #else - auto res = m_data.sourceLocationZonesCnt.emplace( key, 0 ); - m_data.srclocCntLast.first = key; - m_data.srclocCntLast.second = &res.first->second; + GetDefaultCtx().InitSourceLocationZonesCnt(key); #endif } else @@ -388,10 +385,10 @@ Worker::Worker( const char* name, const char* program, const std::vectorSetEnd( v.timestamp ); #ifndef TRACY_NO_STATISTICS - ZoneThreadData ztd; + ZoneContext::ZoneThreadData ztd; ztd.SetZone( zone ); ztd.SetThread( CompressThread( v.tid ) ); - auto slz = GetSourceLocationZones( zone->SrcLoc() ); + auto slz = GetDefaultCtx().GetSourceLocationZones( zone->SrcLoc() ); slz->zones.push_back( ztd ); #else CountZoneStatistics( zone ); @@ -450,10 +447,10 @@ Worker::Worker( const char* name, const char* program, const std::vectorcolor = 0xFFFFFFFF; msg->callstack.SetVal( 0 ); - if( m_threadCtx != v.tid ) + if( GetDefaultCtx().threadCtx != v.tid ) { - m_threadCtx = v.tid; - m_threadCtxData = nullptr; + GetDefaultCtx().threadCtx = v.tid; + GetDefaultCtx().threadCtxData = nullptr; } InsertMessageData( msg ); } @@ -498,7 +495,8 @@ Worker::Worker( const char* name, const char* program, const std::vector() ); + } + else if( type != ZoneContextType::Invalid ) + { + m_data.contexts.push_back( m_slab.AllocInit() ); + } + else + { + throw LoadFailure( "Invalid Context Type" ); + } + m_ctxMap[i] = m_data.contexts.back(); + } + f.Read( m_defaultCtx ); + f.Read( sz ); m_data.cpuTopology.reserve( sz ); for( uint64_t i=0; isecond.zones.reserve( cnt ); - } - - f.Read( sz ); - for( uint64_t i=0; isecond.zones.reserve( cnt ); + // TODO: This probably reserves an unecessary amount + ctx->sourceLocationZones.reserve( sle + sz ); + uint64_t slz_sz; + f.Read( slz_sz ); + for( uint64_t i = 0; i < slz_sz; i++ ) + { + int16_t id; + uint64_t cnt; + f.Read2( id, cnt ); + auto status = ctx->sourceLocationZones.emplace( id, ZoneContext::SourceLocationZones() ); + assert( status.second ); + status.first->second.zones.reserve( cnt ); + } } #else - f.Read( sz ); - for( uint64_t i=0; isourceLocationZonesCnt.emplace( id, 0 ); + } } #endif @@ -1003,10 +1009,12 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow memset( (char*)m_data.zoneChildren.data(), 0, sizeof( Vector> ) * sz ); int32_t childIdx = 0; f.Read( sz ); - m_data.threads.reserve_exact( sz, m_slab ); + GetDefaultCtx().threadData.reserve(sz); + GetDefaultCtx().threads.reserve_exact( sz, m_slab ); for( uint64_t i=0; i(); + auto td = m_slab.AllocInit(); + td->ctx = &GetDefaultCtx(); uint64_t tid; if( fileVer >= FileVersion( 0, 11, 1 ) ) { @@ -1023,7 +1031,7 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow f.Read( tsz ); if( tsz != 0 ) { - ReadTimeline( f, td->timeline, tsz, 0, childIdx ); + ReadTimeline( f, td->timeline, td->ctx, tsz, 0, childIdx ); } uint64_t msz; f.Read( msz ); @@ -1086,23 +1094,19 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow f.Skip( ssz * ( 8 + 3 ) ); } } - m_data.threads[i] = td; - m_threadMap.emplace( tid, td ); + GetDefaultCtx().threads[i] = td; + GetDefaultCtx().threadData.emplace( tid, td ); } s_loadProgress.progress.store( LoadProgress::GpuZones, std::memory_order_relaxed ); f.Read( sz ); s_loadProgress.subTotal.store( sz, std::memory_order_relaxed ); s_loadProgress.subProgress.store( 0, std::memory_order_relaxed ); - f.Read( sz ); - m_data.gpuChildren.reserve_exact( sz, m_slab ); - memset( (char*)m_data.gpuChildren.data(), 0, sizeof( Vector> ) * sz ); - childIdx = 0; - f.Read( sz ); - m_data.gpuData.reserve_exact( sz, m_slab ); - for( uint64_t i=0; i(); + if( m_data.contexts[i]->type == ZoneContextType::CPU ) continue; + auto ctx = static_cast( m_data.contexts[i] ); + uint8_t calibration; f.Read7( ctx->thread, calibration, ctx->count, ctx->period, ctx->type, ctx->name, ctx->overflow ); uint64_t notesz; @@ -1122,17 +1126,23 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow m_data.gpuCnt += ctx->count; uint64_t tdsz; f.Read( tdsz ); + ctx->threads.reserve_exact( tdsz, m_slab ); for( uint64_t j=0; jthreadData.emplace( tid, m_slab.AllocInit() ).first->second; + td->ctx = ctx; + td->id = tid; + td->isFiber = 0; if( tsz != 0 ) { int64_t refTime = 0; int64_t refGpuTime = 0; - auto td = ctx->threadData.emplace( tid, GpuCtxThreadData {} ).first; - ReadTimeline( f, td->second.timeline, tsz, refTime, refGpuTime, childIdx, fileVer >= FileVersion( 0, 12, 4 ) ); + ReadTimeline( f, td->timeline, ctx, tsz, refTime, childIdx ); } + ctx->threads[j] = td; } if( fileVer >= FileVersion( 0, 12, 4 ) ) @@ -1157,7 +1167,8 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow } } - m_data.gpuData[i] = ctx; + m_data.contexts[i] = ctx; + m_ctxMap[i] = ctx; } s_loadProgress.progress.store( LoadProgress::Plots, std::memory_order_relaxed ); @@ -1716,69 +1727,43 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow if( mem.second->reconstruct ) jobs.emplace_back( std::thread( [this, mem = mem.second] { ReconstructMemAllocPlot( *mem ); } ) ); } - std::function>&, uint16_t)> ProcessTimeline; - ProcessTimeline = [this, &ProcessTimeline] ( uint8_t* countMap, Vector>& _vec, uint16_t thread ) + std::function>&, uint16_t, ZoneContext*)> ProcessTimeline; + ProcessTimeline = [this, &ProcessTimeline] ( uint8_t* countMap, Vector>& _vec, uint16_t thread, ZoneContext* ctx ) { if( m_shutdown.load( std::memory_order_relaxed ) ) return; assert( _vec.is_magic() ); auto& vec = *(Vector*)( &_vec ); for( auto& zone : vec ) { - if( zone.IsEndValid() ) ReconstructZoneStatistics( countMap, zone, thread ); + if( zone.IsEndValid() ) ReconstructZoneStatistics( countMap, zone, *ctx, thread ); if( zone.HasChildren() ) { countMap[uint16_t(zone.SrcLoc())]++; - ProcessTimeline( countMap, GetZoneChildrenMutable( zone.Child() ), thread ); + ProcessTimeline( countMap, GetZoneChildren( zone.Child() ), thread, ctx ); countMap[uint16_t(zone.SrcLoc())]--; } } }; jobs.emplace_back( std::thread( [this, ProcessTimeline] { - for( auto& t : m_data.threads ) + for( auto ctx : m_data.contexts ) { - if( m_shutdown.load( std::memory_order_relaxed ) ) return; - if( !t->timeline.empty() ) - { - uint8_t countMap[64*1024]; - // Don't touch thread compression cache in a thread. - ProcessTimeline( countMap, t->timeline, m_data.localThreadCompress.DecompressMustRaw( t->id ) ); - } - } - std::lock_guard lock( m_data.lock ); - m_data.sourceLocationZonesReady = true; - } ) ); - - std::function>&, uint16_t)> ProcessTimelineGpu; - ProcessTimelineGpu = [this, &ProcessTimelineGpu] ( Vector>& _vec, uint16_t thread ) - { - if( m_shutdown.load( std::memory_order_relaxed ) ) return; - assert( _vec.is_magic() ); - auto& vec = *(Vector*)( &_vec ); - for( auto& zone : vec ) - { - if( zone.GpuEnd() >= 0 ) ReconstructZoneStatistics( zone, thread ); - if( zone.Child() >= 0 ) - { - ProcessTimelineGpu( GetGpuChildrenMutable( zone.Child() ), thread ); - } - } - }; - - jobs.emplace_back( std::thread( [this, ProcessTimelineGpu] { - for( auto& t : m_data.gpuData ) - { - for( auto& td : t->threadData ) + for( auto p : ctx->threadData ) { + auto t = p.second; if( m_shutdown.load( std::memory_order_relaxed ) ) return; - if( !td.second.timeline.empty() ) + if( !t->timeline.empty() ) { - ProcessTimelineGpu( td.second.timeline, td.first ); + uint8_t countMap[64 * 1024]; + // Don't touch thread compression cache in a thread. + ProcessTimeline( countMap, t->timeline, m_data.localThreadCompress.DecompressMustRaw( t->id ), ctx ); } } + { + std::lock_guard lock( m_data.lock ); + ctx->SetSourceLocationZonesReady(); + } } - std::lock_guard lock( m_data.lock ); - m_data.gpuSourceLocationZonesReady = true; } ) ); if( eventMask & EventType::Samples ) @@ -1786,11 +1771,12 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow jobs.emplace_back( std::thread( [this] { unordered_flat_map counts; uint32_t total = 0; - for( auto& t : m_data.threads ) total += t->samples.size(); + for( auto& t : GetDefaultCtx().threads ) total += static_cast(t)->samples.size(); if( total != 0 ) { - for( auto& t : m_data.threads ) + for( auto& td : GetDefaultCtx().threads ) { + auto t = static_cast(td); if( m_shutdown.load( std::memory_order_relaxed ) ) return; auto cit = t->ctxSwitchSamples.begin(); for( auto& sd : t->samples ) @@ -1850,8 +1836,9 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow jobs.emplace_back( std::thread( [this] { uint32_t gcnt = 0; - for( auto& t : m_data.threads ) + for( auto& td : GetDefaultCtx().threads ) { + auto t = static_cast(td); if( m_shutdown.load( std::memory_order_relaxed ) ) return; if( !t->samples.empty() ) { @@ -1878,8 +1865,9 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow } ) ); jobs.emplace_back( std::thread( [this] { - for( auto& t : m_data.threads ) + for( auto& td : GetDefaultCtx().threads ) { + auto t = static_cast(td); uint16_t tid = CompressThread( t->id ); for( auto& v : t->samples ) { @@ -1954,24 +1942,24 @@ Worker::~Worker() delete[] m_frameImageBuffer; delete[] m_tmpBuf; - for( auto& v : m_data.threads ) - { - v->timeline.~Vector(); - v->stack.~Vector(); - v->messages.~Vector(); - v->zoneIdStack.~Vector(); - v->samples.~Vector(); -#ifndef TRACY_NO_STATISTICS - v->childTimeStack.~Vector(); - v->ghostZones.~Vector(); -#endif - } - for( auto& v : m_data.gpuData ) + for( auto& v : m_data.contexts ) { for( auto& vt : v->threadData ) { - vt.second.timeline.~Vector(); - vt.second.stack.~Vector(); + vt.second->timeline.~Vector(); + vt.second->stack.~Vector(); +#ifndef TRACY_NO_STATISTICS + vt.second->childTimeStack.~Vector(); +#endif + if (v->type == ZoneContextType::CPU) { + auto ct = static_cast(vt.second); + ct->messages.~Vector(); + ct->zoneIdStack.~Vector(); + ct->samples.~Vector(); +#ifndef TRACY_NO_STATISTICS + ct->ghostZones.~Vector(); +#endif + } } } for( auto& v : m_data.plots.Data() ) @@ -1986,9 +1974,9 @@ Worker::~Worker() { v.second->~LockMap(); } - for( auto& v : m_data.zoneChildren ) + for( auto& zc : m_data.zoneChildren ) { - v.~Vector(); + zc.~Vector(); } for( auto& v : m_data.memNameMap ) { @@ -1998,10 +1986,6 @@ Worker::~Worker() { v.second->v.~Vector(); } - for( auto& v : m_data.gpuChildren ) - { - v.~Vector(); - } #ifndef TRACY_NO_STATISTICS for( auto& v : m_data.ghostChildren ) { @@ -2080,9 +2064,10 @@ uint64_t Worker::GetChildSamplesCountFull() const uint64_t Worker::GetContextSwitchSampleCount() const { uint64_t cnt = 0; - for( auto& v : m_data.threads ) + for( auto& v : GetDefaultCtx().threads ) { - cnt += v->ctxSwitchSamples.size(); + auto t = static_cast(v); + cnt += t->ctxSwitchSamples.size(); } return cnt; } @@ -2356,13 +2341,13 @@ const uint64_t* Worker::GetInlineSymbolList( uint64_t sym, uint32_t len ) return it; } -int64_t Worker::GetZoneEndImpl( const ZoneEvent& ev ) +int64_t Worker::GetZoneEndImpl( const ZoneEvent& ev ) const { assert( !ev.IsEndValid() ); auto ptr = &ev; for(;;) { - if( !ptr->HasChildren() ) return ptr->Start(); + if( !ptr->HasChildren() ) return ptr->Start() >= 0 ? ptr->Start() : m_data.lastTime; auto& children = GetZoneChildren( ptr->Child() ); if( children.is_magic() ) { @@ -2377,27 +2362,6 @@ int64_t Worker::GetZoneEndImpl( const ZoneEvent& ev ) } } -int64_t Worker::GetZoneEndImpl( const GpuEvent& ev ) -{ - assert( ev.GpuEnd() < 0 ); - auto ptr = &ev; - for(;;) - { - if( ptr->Child() < 0 ) return ptr->GpuStart() >= 0 ? ptr->GpuStart() : m_data.lastTime; - auto& children = GetGpuChildren( ptr->Child() ); - if( children.is_magic() ) - { - auto& c = *(Vector*)&children; - ptr = &c.back(); - } - else - { - ptr = children.back(); - } - if( ptr->GpuEnd() >= 0 ) return ptr->GpuEnd(); - } -} - uint32_t Worker::FindStringIdx( const char* str ) const { if( !str ) return 0; @@ -2498,7 +2462,7 @@ const char* Worker::GetThreadName( uint64_t id ) const bool Worker::IsThreadLocal( uint64_t id ) { - auto td = RetrieveThread( id ); + auto td = static_cast(RetrieveThread( id )); return td && ( td->count > 0 || !td->samples.empty() ); } @@ -2574,12 +2538,6 @@ const char* Worker::GetZoneName( const ZoneEvent& ev, const SourceLocation& srcl } } -const char* Worker::GetZoneName( const GpuEvent& ev ) const -{ - auto& srcloc = GetSourceLocation( ev.SrcLoc() ); - return GetZoneName( srcloc ); -} - static bool strstr_nocase( const char* l, const char* r ) { const auto lsz = strlen( l ); @@ -2649,20 +2607,33 @@ std::vector Worker::GetMatchingSourceLocation( const char* query, bool } #ifndef TRACY_NO_STATISTICS -Worker::SourceLocationZones& Worker::GetZonesForSourceLocation( int16_t srcloc ) +pair Worker::GetZonesForSourceLocation( int16_t srcloc ) { - assert( AreSourceLocationZonesReady() ); - static SourceLocationZones empty; - auto it = m_data.sourceLocationZones.find( srcloc ); - return it != m_data.sourceLocationZones.end() ? it->second : empty; + ZoneContext::SourceLocationZones* result = nullptr; + ZoneContext* ctxt = nullptr; + for( auto ctx : GetCtxData() ) + { + result = &ctx->GetZonesForSourceLocation( srcloc ); + if( !result->zones.empty() ) + { + ctxt = ctx; + break; + } + } + return { *result, ctxt }; } -const Worker::SourceLocationZones& Worker::GetZonesForSourceLocation( int16_t srcloc ) const -{ - assert( AreSourceLocationZonesReady() ); - static const SourceLocationZones empty; - auto it = m_data.sourceLocationZones.find( srcloc ); - return it != m_data.sourceLocationZones.end() ? it->second : empty; +bool Worker::AreSourceLocationZonesReady() const { + bool allReady = true; + for( auto ctx : GetCtxData() ) + { + if( !ctx->AreSourceLocationZonesReady() ) + { + allReady = false; + break; + } + } + return allReady; } const SymbolStats* Worker::GetSymbolStats( uint64_t symAddr ) const @@ -2871,7 +2842,7 @@ void Worker::Exec() if( m_data.mainThreadWantsLock ) { // Hand over the lock to the main thread to avoid starving it. - // Wait for a millisecond maximum to avoid the opposite + // Wait for a millisecond maximum to avoid the opposite // problem where main thread would never let us execute m_data.lockCv.wait_for( lk, std::chrono::milliseconds( 1 ) ); } @@ -2946,7 +2917,7 @@ void Worker::Exec() if( !m_crashed && !m_disconnect ) { bool done = true; - for( auto& v : m_data.threads ) + for( auto& v : GetDefaultCtx().threads ) { if( !v->stack.empty() ) { @@ -3411,13 +3382,9 @@ int16_t Worker::NewShrinkedSourceLocation( uint64_t srcloc ) const auto sz = int16_t( m_data.sourceLocationExpand.size() ); m_data.sourceLocationExpand.push_back( srcloc ); #ifndef TRACY_NO_STATISTICS - auto res = m_data.sourceLocationZones.emplace( sz, SourceLocationZones() ); - m_data.srclocZonesLast.first = sz; - m_data.srclocZonesLast.second = &res.first->second; + GetDefaultCtx().InitSourceLocationZones( sz ); #else - auto res = m_data.sourceLocationZonesCnt.emplace( sz, 0 ); - m_data.srclocCntLast.first = sz; - m_data.srclocCntLast.second = &res.first->second; + GetDefaultCtx().InitSourceLocationZonesCnt( sz ); #endif m_sourceLocationShrink.emplace( srcloc, sz ); m_data.shrinkSrclocLast.first = srcloc; @@ -3460,8 +3427,8 @@ void Worker::InsertMessageData( MessageData* msg ) ThreadData* Worker::NoticeThreadReal( uint64_t thread ) { - auto it = m_threadMap.find( thread ); - if( it != m_threadMap.end() ) + auto it = GetDefaultCtx().threadData.find( thread ); + if( it != GetDefaultCtx().threadData.end() ) { m_data.threadDataLast.first = thread; m_data.threadDataLast.second = it->second; @@ -3476,8 +3443,8 @@ ThreadData* Worker::NoticeThreadReal( uint64_t thread ) ThreadData* Worker::RetrieveThreadReal( uint64_t thread ) { - auto it = m_threadMap.find( thread ); - if( it != m_threadMap.end() ) + auto it = GetDefaultCtx().threadData.find( thread ); + if( it != GetDefaultCtx().threadData.end() ) { m_data.threadDataLast.first = thread; m_data.threadDataLast.second = it->second; @@ -3491,61 +3458,40 @@ ThreadData* Worker::RetrieveThreadReal( uint64_t thread ) ThreadData* Worker::GetCurrentThreadData() { - auto td = m_threadCtxData; - if( !td ) td = m_threadCtxData = NoticeThread( m_threadCtx ); + auto td = GetDefaultCtx().threadCtxData; + if( !td ) td = GetDefaultCtx().threadCtxData = NoticeThread( GetDefaultCtx().threadCtx ); if( td->fiber ) td = td->fiber; return td; } -#ifndef TRACY_NO_STATISTICS -Worker::SourceLocationZones* Worker::GetSourceLocationZonesReal( uint16_t srcloc ) -{ - auto it = m_data.sourceLocationZones.find( srcloc ); - assert( it != m_data.sourceLocationZones.end() ); - m_data.srclocZonesLast.first = srcloc; - m_data.srclocZonesLast.second = &it->second; - return &it->second; -} - -Worker::GpuSourceLocationZones* Worker::GetGpuSourceLocationZonesReal( uint16_t srcloc ) +const std::string& Worker::GetCtxName( ZoneContext* ctx ) const { - auto it = m_data.gpuSourceLocationZones.find( srcloc ); - if( it == m_data.gpuSourceLocationZones.end() ) + static std::string unknown = "Unknown"; + for( uint8_t idx = 0; idx < m_data.contexts.size(); idx++ ) { - it = m_data.gpuSourceLocationZones.emplace( srcloc, GpuSourceLocationZones() ).first; + if( ctx == m_ctxMap[idx] ) + { + return GetCtxName( idx ); + } } - m_data.gpuZonesLast.first = srcloc; - m_data.gpuZonesLast.second = &it->second; - return &it->second; -} -#else -uint64_t* Worker::GetSourceLocationZonesCntReal( uint16_t srcloc ) -{ - auto it = m_data.sourceLocationZonesCnt.find( srcloc ); - assert( it != m_data.sourceLocationZonesCnt.end() ); - m_data.srclocCntLast.first = srcloc; - m_data.srclocCntLast.second = &it->second; - return &it->second; + return unknown; } -uint64_t* Worker::GetGpuSourceLocationZonesCntReal( uint16_t srcloc ) +const std::string& Worker::GetCtxName( uint8_t idx ) const { - auto it = m_data.gpuSourceLocationZonesCnt.find( srcloc ); - if( it == m_data.gpuSourceLocationZonesCnt.end() ) + auto ctx = m_data.contexts[idx]; + if( ctx->longName.size() == 0 ) { - it = m_data.gpuSourceLocationZonesCnt.emplace( srcloc, 0 ).first; + std::stringstream ctxName; + ctxName << ZoneContextNames[(uint8_t)ctx->type]; + ctxName << ":" << (unsigned)idx; + if( ctx->name.Active() ) + { + ctxName << " " << GetString( ctx->name ); + } + ctx->longName = ctxName.str(); } - m_data.gpuCntLast.first = srcloc; - m_data.gpuCntLast.second = &it->second; - return &it->second; -} -#endif - -const ThreadData* Worker::GetThreadData( uint64_t tid ) const -{ - auto it = m_threadMap.find( tid ); - if( it == m_threadMap.end() ) return nullptr; - return it->second; + return ctx->longName; } const MemData& Worker::GetMemoryNamed( uint64_t name ) const @@ -3555,9 +3501,9 @@ const MemData& Worker::GetMemoryNamed( uint64_t name ) const return *it->second; } -ThreadData* Worker::NewThread( uint64_t thread, bool fiber, int32_t groupHint ) +CPUThreadData* Worker::NewThread( uint64_t thread, bool fiber, int32_t groupHint ) { - auto td = m_slab.AllocInit(); + auto td = m_slab.AllocInit(); td->id = thread; td->count = 0; td->nextZoneId = 0; @@ -3571,8 +3517,9 @@ ThreadData* Worker::NewThread( uint64_t thread, bool fiber, int32_t groupHint ) td->stackCount = (uint8_t*)m_slab.AllocBig( sizeof( uint8_t ) * 64*1024 ); memset( td->stackCount, 0, sizeof( uint8_t ) * 64*1024 ); td->groupHint = groupHint; - m_data.threads.push_back( td ); - m_threadMap.emplace( thread, td ); + td->ctx = &GetDefaultCtx(); + GetDefaultCtx().threads.push_back( td ); + GetDefaultCtx().threadData.emplace( thread, td ); m_data.threadDataLast.first = thread; m_data.threadDataLast.second = td; return td; @@ -3782,13 +3729,9 @@ void Worker::AddSourceLocationPayload( const char* data, size_t sz ) } const auto key = -int16_t( idx + 1 ); #ifndef TRACY_NO_STATISTICS - auto res = m_data.sourceLocationZones.emplace( key, SourceLocationZones() ); - m_data.srclocZonesLast.first = key; - m_data.srclocZonesLast.second = &res.first->second; + GetDefaultCtx().InitSourceLocationZones(key); #else - auto res = m_data.sourceLocationZonesCnt.emplace( key, 0 ); - m_data.srclocCntLast.first = key; - m_data.srclocCntLast.second = &res.first->second; + GetDefaultCtx().InitSoruceLocationZonesCnt(key); #endif } else @@ -4263,8 +4206,9 @@ void Worker::DoPostponedWork() if( m_identifySamples && m_data.newContextSwitchesReceived ) { - for( auto& td : m_data.threads ) + for( auto& thd : GetDefaultCtx().threads ) { + auto td = static_cast(thd); if( !td->postponedSamples.empty() ) { auto ctx = GetContextSwitchData( td->id ); @@ -4424,8 +4368,9 @@ void Worker::HandlePostponedGhostZones() assert( m_data.newFramesWereReceived ); if( !m_data.ghostZonesPostponed ) return; bool postponed = false; - for( auto& td : m_data.threads ) + for( auto& thd : GetDefaultCtx().threads ) { + auto td = static_cast(thd); while( td->ghostIdx != td->samples.size() ) { const auto& sample = td->samples[td->ghostIdx]; @@ -4800,11 +4745,11 @@ bool Worker::Process( const QueueItem& ev ) void Worker::ProcessThreadContext( const QueueThreadContext& ev ) { - m_refTimeThread = 0; - if( m_threadCtx != ev.thread ) + GetDefaultCtx().refTimeThread = 0; + if( GetDefaultCtx().threadCtx != ev.thread ) { - m_threadCtx = ev.thread; - m_threadCtxData = RetrieveThread( ev.thread ); + GetDefaultCtx().threadCtx = ev.thread; + GetDefaultCtx().threadCtxData = RetrieveThread( ev.thread ); } } @@ -4819,7 +4764,7 @@ void Worker::ProcessZoneBeginImpl( ZoneEvent* zone, const QueueZoneBegin& ev ) { CheckSourceLocation( ev.srcloc ); - const auto start = TscTime( RefTime( m_refTimeThread, ev.time ) ); + const auto start = TscTime( RefTime( GetDefaultCtx().refTimeThread, ev.time ) ); zone->SetStartSrcLoc( start, ShrinkSourceLocation( ev.srcloc ) ); zone->SetEnd( -1 ); zone->SetChild( -1 ); @@ -4833,7 +4778,7 @@ void Worker::ProcessZoneBeginAllocSrcLocImpl( ZoneEvent* zone, const QueueZoneBe { assert( m_pendingSourceLocationPayload != 0 ); - const auto start = TscTime( RefTime( m_refTimeThread, ev.time ) ); + const auto start = TscTime( RefTime( GetDefaultCtx().refTimeThread, ev.time ) ); zone->SetStartSrcLoc( start, m_pendingSourceLocationPayload ); zone->SetEnd( -1 ); zone->SetChild( -1 ); @@ -4900,6 +4845,58 @@ void Worker::ProcessZoneBeginAllocSrcLocCallstack( const QueueZoneBeginLean& ev it->second = 0; } +#ifndef TRACY_NO_STATISTICS +void Worker::UpdateStats( ZoneEvent* zone, ThreadData& td, bool isReentry, int64_t timeEnd ) +{ + assert( !td.childTimeStack.empty() ); + const auto timeSpan = timeEnd - zone->Start(); + if( timeSpan > 0 ) + { + const auto ctid = CompressThread( td.id ); + ZoneContext::ZoneThreadData ztd; + ztd.SetZone( zone ); + ztd.SetThread( ctid ); + + auto slz = td.ctx->GetSourceLocationZones( zone->SrcLoc() ); + slz->zones.push_back( ztd ); + if( slz->min > timeSpan ) slz->min = timeSpan; + if( slz->max < timeSpan ) slz->max = timeSpan; + slz->total += timeSpan; + slz->sumSq += double( timeSpan ) * timeSpan; + const auto selfSpan = timeSpan - td.childTimeStack.back_and_pop(); + if( slz->selfMin > selfSpan ) slz->selfMin = selfSpan; + if( slz->selfMax < selfSpan ) slz->selfMax = selfSpan; + slz->selfTotal += selfSpan; + + if( !isReentry ) + { + slz->nonReentrantCount++; + if( slz->nonReentrantMin > timeSpan ) slz->nonReentrantMin = timeSpan; + if( slz->nonReentrantMax < timeSpan ) slz->nonReentrantMax = timeSpan; + slz->nonReentrantTotal += timeSpan; + } + if( !td.childTimeStack.empty() ) + { + td.childTimeStack.back() += timeSpan; + } + + auto it = slz->threadCnt.find( ctid ); + if( it == slz->threadCnt.end() ) + { + slz->threadCnt.emplace( ctid, 1 ); + } + else + { + it->second++; + } + } + else + { + td.childTimeStack.pop_back(); + } +} +#endif + void Worker::ProcessZoneEnd( const QueueZoneEnd& ev ) { auto td = GetCurrentThreadData(); @@ -4921,7 +4918,7 @@ void Worker::ProcessZoneEnd( const QueueZoneEnd& ev ) auto zone = stack.back_and_pop(); assert( zone->End() == -1 ); const auto isReentry = td->DecStackCount( zone->SrcLoc() ); - const auto timeEnd = TscTime( RefTime( m_refTimeThread, ev.time ) ); + const auto timeEnd = TscTime( RefTime( td->ctx->refTimeThread, ev.time ) ); zone->SetEnd( timeEnd ); assert( timeEnd >= zone->Start() ); @@ -4955,52 +4952,7 @@ void Worker::ProcessZoneEnd( const QueueZoneEnd& ev ) } #ifndef TRACY_NO_STATISTICS - assert( !td->childTimeStack.empty() ); - const auto timeSpan = timeEnd - zone->Start(); - if( timeSpan > 0 ) - { - const auto ctid = CompressThread( td->id ); - ZoneThreadData ztd; - ztd.SetZone( zone ); - ztd.SetThread( ctid ); - - auto slz = GetSourceLocationZones( zone->SrcLoc() ); - slz->zones.push_back( ztd ); - if( slz->min > timeSpan ) slz->min = timeSpan; - if( slz->max < timeSpan ) slz->max = timeSpan; - slz->total += timeSpan; - slz->sumSq += double( timeSpan ) * timeSpan; - const auto selfSpan = timeSpan - td->childTimeStack.back_and_pop(); - if( slz->selfMin > selfSpan ) slz->selfMin = selfSpan; - if( slz->selfMax < selfSpan ) slz->selfMax = selfSpan; - slz->selfTotal += selfSpan; - - if( !isReentry ) - { - slz->nonReentrantCount++; - if( slz->nonReentrantMin > timeSpan ) slz->nonReentrantMin = timeSpan; - if( slz->nonReentrantMax < timeSpan ) slz->nonReentrantMax = timeSpan; - slz->nonReentrantTotal += timeSpan; - } - if( !td->childTimeStack.empty() ) - { - td->childTimeStack.back() += timeSpan; - } - - auto it = slz->threadCnt.find( ctid ); - if( it == slz->threadCnt.end() ) - { - slz->threadCnt.emplace( ctid, 1 ); - } - else - { - it->second++; - } - } - else - { - td->childTimeStack.pop_back(); - } + UpdateStats( zone, *td, isReentry, timeEnd ); #else CountZoneStatistics( zone ); #endif @@ -5278,10 +5230,10 @@ void Worker::ProcessFrameImage( const QueueFrameImage& ev ) void Worker::ProcessZoneText() { - auto td = RetrieveThread( m_threadCtx ); + auto td = RetrieveThread( GetDefaultCtx().threadCtx ); if( !td ) { - ZoneTextFailure( m_threadCtx, m_pendingSingleString.ptr ); + ZoneTextFailure( GetDefaultCtx().threadCtx, m_pendingSingleString.ptr ); return; } if( td->fiber ) td = td->fiber; @@ -5325,10 +5277,10 @@ void Worker::ProcessZoneText() void Worker::ProcessZoneName() { - auto td = RetrieveThread( m_threadCtx ); + auto td = RetrieveThread( GetDefaultCtx().threadCtx ); if( !td ) { - ZoneNameFailure( m_threadCtx ); + ZoneNameFailure( GetDefaultCtx().threadCtx ); return; } if( td->fiber ) td = td->fiber; @@ -5347,10 +5299,10 @@ void Worker::ProcessZoneName() void Worker::ProcessZoneColor( const QueueZoneColor& ev ) { - auto td = RetrieveThread( m_threadCtx ); + auto td = RetrieveThread( GetDefaultCtx().threadCtx ); if( !td ) { - ZoneColorFailure( m_threadCtx ); + ZoneColorFailure( GetDefaultCtx().threadCtx ); return; } if( td->fiber ) td = td->fiber; @@ -5373,10 +5325,10 @@ void Worker::ProcessZoneValue( const QueueZoneValue& ev ) char tmp[64]; const auto tsz = sprintf( tmp, "%" PRIu64 " [0x%" PRIx64 "]", ev.value, ev.value ); - auto td = RetrieveThread( m_threadCtx ); + auto td = RetrieveThread( GetDefaultCtx().threadCtx ); if( !td ) { - ZoneValueFailure( m_threadCtx, ev.value ); + ZoneValueFailure( GetDefaultCtx().threadCtx, ev.value ); return; } if( td->fiber ) td = td->fiber; @@ -5599,7 +5551,7 @@ void Worker::ProcessPlotDataImpl( uint64_t name, int64_t evTime, double val ) Query( ServerQueryPlotName, name ); } ); - const auto time = TscTime( RefTime( m_refTimeThread, evTime ) ); + const auto time = TscTime( RefTime( GetDefaultCtx().refTimeThread, evTime ) ); if( m_data.lastTime < time ) m_data.lastTime = time; InsertPlot( plot, time, val ); } @@ -5726,10 +5678,22 @@ void Worker::ProcessMessageAppInfo( const QueueMessage& ev ) if( m_data.lastTime < time ) m_data.lastTime = time; } +void Worker::ProcessCpuNewContext( ) +{ + const uint8_t context_id = 0; + ZoneContext* ctx = m_slab.AllocInit(); + if( m_defaultCtx == UINT8_MAX ) + { + m_defaultCtx = context_id; + } + m_data.contexts.push_back( ctx ); + m_ctxMap[context_id] = ctx; +} + void Worker::ProcessGpuNewContext( const QueueGpuNewContext& ev ) { - assert( !m_gpuCtxMap[ev.context] ); - assert( ev.type != GpuContextType::Invalid ); + assert( !m_ctxMap[ev.context] ); + assert( ev.type != ZoneContextType::Invalid ); int64_t gpuTime; if( ev.period == 1.f ) @@ -5757,18 +5721,21 @@ void Worker::ProcessGpuNewContext( const QueueGpuNewContext& ev ) gpu->lastGpuTime = 0; gpu->overflow = 0; gpu->overflowMul = 0; - m_data.gpuData.push_back( gpu ); - m_gpuCtxMap[ev.context] = gpu; +#ifndef TRACY_NO_STATISTICS + gpu->SetSourceLocationZonesReady(); +#endif + m_data.contexts.push_back( gpu ); + m_ctxMap[ev.context] = gpu; } -void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& ev, bool serial ) +void Worker::ProcessGpuZoneBeginImpl( ZoneEvent* zone, const QueueGpuZoneBegin& ev, bool serial ) { CheckSourceLocation( ev.srcloc ); zone->SetSrcLoc( ShrinkSourceLocation( ev.srcloc ) ); ProcessGpuZoneBeginImplCommon( zone, ev, serial ); } -void Worker::ProcessGpuZoneBeginAllocSrcLocImpl( GpuEvent* zone, const QueueGpuZoneBeginLean& ev, bool serial ) +void Worker::ProcessGpuZoneBeginAllocSrcLocImpl( ZoneEvent* zone, const QueueGpuZoneBeginLean& ev, bool serial ) { assert( m_pendingSourceLocationPayload != 0 ); zone->SetSrcLoc( m_pendingSourceLocationPayload ); @@ -5776,13 +5743,20 @@ void Worker::ProcessGpuZoneBeginAllocSrcLocImpl( GpuEvent* zone, const QueueGpuZ m_pendingSourceLocationPayload = 0; } -void Worker::ProcessGpuZoneBeginImplCommon( GpuEvent* zone, const QueueGpuZoneBeginLean& ev, bool serial ) +void Worker::ProcessGpuZoneBeginImplCommon( ZoneEvent* zone, const QueueGpuZoneBeginLean& ev, bool serial ) { m_data.gpuCnt++; - auto ctx = m_gpuCtxMap[ev.context].get(); + auto ctx = static_cast(m_ctxMap[ev.context].get()); assert( ctx ); + uint16_t srcloc = zone->SrcLoc(); + auto slz = ctx->sourceLocationZones.find( srcloc ); + if( slz == ctx->sourceLocationZones.end() ) + { + ctx->sourceLocationZones.emplace( srcloc, ZoneContext::SourceLocationZones() ); + } + int64_t cpuTime; if( serial ) { @@ -5790,29 +5764,31 @@ void Worker::ProcessGpuZoneBeginImplCommon( GpuEvent* zone, const QueueGpuZoneBe } else { - cpuTime = RefTime( m_refTimeThread, ev.cpuTime ); + cpuTime = RefTime( GetDefaultCtx().refTimeThread, ev.cpuTime ); } + auto& zoneExtra = GetZoneExtraMutable(*zone); + const auto time = TscTime( cpuTime ); - zone->SetCpuStart( time ); - zone->SetCpuEnd( -1 ); - zone->SetGpuStart( -1 ); - zone->SetGpuEnd( -1 ); - zone->callstack.SetVal( 0 ); + zoneExtra.otherStart.SetVal( time ); + zoneExtra.otherEnd.SetVal( -1 ); + zone->SetStart( -1 ); + zone->SetEnd( -1 ); + zoneExtra.callstack.SetVal(0); zone->SetChild( -1 ); - zone->query_id = ev.queryId; + zoneExtra.query_id = ev.queryId; uint64_t ztid; if( ctx->thread == 0 ) { // Vulkan, OpenCL and Direct3D 12 contexts are not bound to any single thread. - zone->SetThread( CompressThread( ev.thread ) ); + ctx->threadCtx = ev.thread; ztid = ev.thread; } else { // OpenGL and Direct3D11 doesn't need per-zone thread id. It still can be sent, // because it may be needed for callstack collection purposes. - zone->SetThread( 0 ); + ctx->threadCtx = 0; ztid = 0; } @@ -5821,19 +5797,25 @@ void Worker::ProcessGpuZoneBeginImplCommon( GpuEvent* zone, const QueueGpuZoneBe auto td = ctx->threadData.find( ztid ); if( td == ctx->threadData.end() ) { - td = ctx->threadData.emplace( ztid, GpuCtxThreadData {} ).first; + td = ctx->threadData.emplace( ztid, m_slab.AllocInit() ).first; + ctx->threads.push_back( td->second ); + td->second->ctx = ctx; + td->second->id = ztid; } - auto timeline = &td->second.timeline; - auto& stack = td->second.stack; + auto timeline = &td->second->timeline; + auto& stack = td->second->stack; +#ifndef TRACY_NO_STATISTICS + td->second->childTimeStack.push_back( 0 ); +#endif if( !stack.empty() ) { auto back = stack.back(); if( back->Child() < 0 ) { - back->SetChild( int32_t( m_data.gpuChildren.size() ) ); - m_data.gpuChildren.push_back( Vector>() ); + back->SetChild( int32_t( m_data.zoneChildren.size() ) ); + m_data.zoneChildren.push_back( Vector>() ); } - timeline = &m_data.gpuChildren[back->Child()]; + timeline = &m_data.zoneChildren[back->Child()]; } timeline->push_back( zone ); @@ -5845,18 +5827,20 @@ void Worker::ProcessGpuZoneBeginImplCommon( GpuEvent* zone, const QueueGpuZoneBe void Worker::ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev, bool serial ) { - auto zone = m_slab.Alloc(); + auto zone = AllocZoneEvent(); + RequestZoneExtra(*zone); ProcessGpuZoneBeginImpl( zone, ev, serial ); } void Worker::ProcessGpuZoneBeginCallstack( const QueueGpuZoneBegin& ev, bool serial ) { - auto zone = m_slab.Alloc(); + auto zone = AllocZoneEvent(); + auto extra = RequestZoneExtra(*zone); ProcessGpuZoneBeginImpl( zone, ev, serial ); if( serial ) { assert( m_serialNextCallstack != 0 ); - zone->callstack.SetVal( m_serialNextCallstack ); + extra.callstack.SetVal( m_serialNextCallstack ); m_serialNextCallstack = 0; } else @@ -5864,25 +5848,27 @@ void Worker::ProcessGpuZoneBeginCallstack( const QueueGpuZoneBegin& ev, bool ser auto td = GetCurrentThreadData(); auto it = m_nextCallstack.find( td->id ); assert( it != m_nextCallstack.end() ); - zone->callstack.SetVal( it->second ); + extra.callstack.SetVal( it->second ); it->second = 0; } } void Worker::ProcessGpuZoneBeginAllocSrcLoc( const QueueGpuZoneBeginLean& ev, bool serial ) { - auto zone = m_slab.Alloc(); + auto zone = AllocZoneEvent(); + RequestZoneExtra(*zone); ProcessGpuZoneBeginAllocSrcLocImpl( zone, ev, serial ); } void Worker::ProcessGpuZoneBeginAllocSrcLocCallstack( const QueueGpuZoneBeginLean& ev, bool serial ) { - auto zone = m_slab.Alloc(); + auto zone = AllocZoneEvent(); + auto extra = RequestZoneExtra(*zone); ProcessGpuZoneBeginAllocSrcLocImpl( zone, ev, serial ); if( serial ) { assert( m_serialNextCallstack != 0 ); - zone->callstack.SetVal( m_serialNextCallstack ); + extra.callstack.SetVal( m_serialNextCallstack ); m_serialNextCallstack = 0; } else @@ -5890,21 +5876,22 @@ void Worker::ProcessGpuZoneBeginAllocSrcLocCallstack( const QueueGpuZoneBeginLea auto td = GetCurrentThreadData(); auto it = m_nextCallstack.find( td->id ); assert( it != m_nextCallstack.end() ); - zone->callstack.SetVal( it->second ); + extra.callstack.SetVal( it->second ); it->second = 0; } } void Worker::ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev, bool serial ) { - auto ctx = m_gpuCtxMap[ev.context]; + auto ctx = static_cast(m_ctxMap[ev.context].get()); assert( ctx ); auto td = ctx->threadData.find( ev.thread ); assert( td != ctx->threadData.end() ); - assert( !td->second.stack.empty() ); - auto zone = td->second.stack.back_and_pop(); + assert( !td->second->stack.empty() ); + auto zone = td->second->stack.back_and_pop(); + auto& extra = GetZoneExtraMutable(*zone); assert( !ctx->query[ev.queryId] ); ctx->query[ev.queryId] = zone; @@ -5916,16 +5903,16 @@ void Worker::ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev, bool serial ) } else { - cpuTime = RefTime( m_refTimeThread, ev.cpuTime ); + cpuTime = RefTime( GetDefaultCtx().refTimeThread, ev.cpuTime ); } const auto time = TscTime( cpuTime ); - zone->SetCpuEnd( time ); + extra.otherEnd.SetVal( time ); if( m_data.lastTime < time ) m_data.lastTime = time; } void Worker::ProcessGpuTime( const QueueGpuTime& ev ) { - auto ctx = m_gpuCtxMap[ev.context]; + auto ctx = static_cast(m_ctxMap[ev.context].get()); assert( ctx ); int64_t tgpu = RefTime( m_refTimeGpu, ev.gpuTime ); @@ -5971,29 +5958,17 @@ void Worker::ProcessGpuTime( const QueueGpuTime& ev ) assert( zone ); ctx->query[ev.queryId] = nullptr; - if( zone->GpuStart() < 0 ) + if( zone->Start() < 0 ) { - zone->SetGpuStart( gpuTime ); + zone->SetStart( gpuTime ); ctx->count++; } else { - zone->SetGpuEnd( gpuTime ); + zone->SetEnd( gpuTime ); #ifndef TRACY_NO_STATISTICS - const auto gpuStart = zone->GpuStart(); - const auto timeSpan = gpuTime - gpuStart; - if( timeSpan > 0 ) - { - GpuZoneThreadData ztd; - ztd.SetZone( zone ); - ztd.SetThread( zone->Thread() ); - auto slz = GetGpuSourceLocationZones( zone->SrcLoc() ); - slz->zones.push_back( ztd ); - if( slz->min > timeSpan ) slz->min = timeSpan; - if( slz->max < timeSpan ) slz->max = timeSpan; - slz->total += timeSpan; - slz->sumSq += double( timeSpan ) * timeSpan; - } + // TODO: reentry can be supported here probably + UpdateStats( zone, *ctx->threadData.at( ctx->threadCtx ), false, gpuTime ); #else CountZoneStatistics( zone ); #endif @@ -6003,7 +5978,7 @@ void Worker::ProcessGpuTime( const QueueGpuTime& ev ) void Worker::ProcessGpuCalibration( const QueueGpuCalibration& ev ) { - auto ctx = m_gpuCtxMap[ev.context]; + auto ctx = static_cast(m_ctxMap[ev.context].get()); assert( ctx ); assert( ctx->hasCalibration ); @@ -6026,7 +6001,7 @@ void Worker::ProcessGpuCalibration( const QueueGpuCalibration& ev ) void Worker::ProcessGpuTimeSync( const QueueGpuTimeSync& ev ) { - auto ctx = m_gpuCtxMap[ev.context]; + auto ctx = static_cast(m_ctxMap[ev.context].get()); assert( ctx ); int64_t gpuTime; @@ -6049,7 +6024,7 @@ void Worker::ProcessGpuTimeSync( const QueueGpuTimeSync& ev ) void Worker::ProcessGpuContextName( const QueueGpuContextName& ev ) { - auto ctx = m_gpuCtxMap[ev.context]; + auto ctx = static_cast(m_ctxMap[ev.context].get()); assert( ctx ); const auto idx = GetSingleStringIdx(); ctx->name = StringIdx( idx ); @@ -6057,7 +6032,7 @@ void Worker::ProcessGpuContextName( const QueueGpuContextName& ev ) void Worker::ProcessGpuAnnotationName( const QueueGpuAnnotationName& ev ) { - auto ctx = m_gpuCtxMap[ev.context]; + auto ctx = static_cast(m_ctxMap[ev.context].get()); assert( ctx ); const auto idx = GetSingleStringIdx(); ctx->noteNames[ev.noteId] = StringIdx( idx ); @@ -6065,7 +6040,7 @@ void Worker::ProcessGpuAnnotationName( const QueueGpuAnnotationName& ev ) void Worker::ProcessGpuZoneAnnotation( const QueueGpuZoneAnnotation& ev ) { - auto ctx = m_gpuCtxMap[ev.context]; + auto ctx = static_cast(m_ctxMap[ev.context].get()); assert( ctx ); auto note = ctx->notes.find( ev.queryId ); if( note == ctx->notes.end() ) { @@ -6321,7 +6296,7 @@ void Worker::ProcessCallstack() m_pendingCallstackId = 0; } -void Worker::ProcessCallstackSampleInsertSample( const SampleData& sd, ThreadData& td ) +void Worker::ProcessCallstackSampleInsertSample( const SampleData& sd, CPUThreadData& td ) { const auto t = sd.time.Val(); if( td.samples.empty() ) @@ -6358,7 +6333,7 @@ void Worker::ProcessCallstackSampleInsertSample( const SampleData& sd, ThreadDat m_data.samplesCnt++; } -void Worker::ProcessCallstackSampleImpl( const SampleData& sd, ThreadData& td ) +void Worker::ProcessCallstackSampleImpl( const SampleData& sd, CPUThreadData& td ) { ProcessCallstackSampleInsertSample( sd, td ); @@ -6401,7 +6376,7 @@ void Worker::ProcessCallstackSampleImpl( const SampleData& sd, ThreadData& td ) } #ifndef TRACY_NO_STATISTICS -void Worker::ProcessCallstackSampleImplStats( const SampleData& sd, ThreadData& td ) +void Worker::ProcessCallstackSampleImplStats( const SampleData& sd, CPUThreadData& td ) { const auto t = sd.time.Val(); const auto callstack = sd.callstack.Val(); @@ -6515,7 +6490,7 @@ void Worker::ProcessCallstackSample( const QueueCallstackSample& ev ) const auto t = refTime == 0 ? 0 : TscTime( refTime ); if( m_data.lastTime < t ) m_data.lastTime = t; - auto& td = *NoticeThread( ev.thread ); + auto& td = *static_cast(NoticeThread( ev.thread )); SampleData sd; sd.time.SetVal( t ); @@ -6560,7 +6535,7 @@ void Worker::ProcessCallstackSampleContextSwitch( const QueueCallstackSample& ev const auto t = refTime == 0 ? 0 : TscTime( refTime ); if( m_data.lastTime < t ) m_data.lastTime = t; - auto& td = *NoticeThread( ev.thread ); + auto& td = *static_cast(NoticeThread( ev.thread )); SampleData sd; sd.time.SetVal( t ); @@ -6918,7 +6893,7 @@ void Worker::ProcessContextSwitch( const QueueContextSwitch& ev ) if ( data.size() > 1 ) { // Sometimes the OS tell us it scheduled a thread that was still alive but on the - // verge of being switched out. We thus end up with `wakeup < switchout`. + // verge of being switched out. We thus end up with `wakeup < switchout`. // So instead, compare with the previous wakeup. const auto previousWakeup = data[data.size() - 2].WakeupVal(); if ( previousWakeup <= wakeupTime && wakeupTime <= time ) @@ -7101,7 +7076,7 @@ void Worker::ProcessThreadGroupHint( const QueueThreadGroupHint& ev ) void Worker::ProcessFiberEnter( const QueueFiberEnter& ev ) { - const auto t = TscTime( RefTime( m_refTimeThread, ev.time ) ); + const auto t = TscTime( RefTime( GetDefaultCtx().refTimeThread, ev.time ) ); if( m_data.lastTime < t ) m_data.lastTime = t; uint64_t tid; @@ -7151,7 +7126,7 @@ void Worker::ProcessFiberEnter( const QueueFiberEnter& ev ) void Worker::ProcessFiberLeave( const QueueFiberLeave& ev ) { - const auto t = TscTime( RefTime( m_refTimeThread, ev.time ) ); + const auto t = TscTime( RefTime( GetDefaultCtx().refTimeThread, ev.time ) ); if( m_data.lastTime < t ) m_data.lastTime = t; auto td = RetrieveThread( ev.thread ); @@ -7638,14 +7613,14 @@ void Worker::UpdateSampleStatisticsImpl( const CallstackFrameData** frames, uint } #endif -int64_t Worker::ReadTimeline( FileRead& f, ZoneEvent* zone, int64_t refTime, int32_t& childIdx ) +int64_t Worker::ReadTimeline( FileRead& f, ZoneEvent* zone, ZoneContext* ctx, int64_t refTime, int32_t& childIdx ) { uint32_t sz; f.Read( sz ); - return ReadTimelineHaveSize( f, zone, refTime, childIdx, sz ); + return ReadTimelineHaveSize( f, zone, ctx, refTime, childIdx, sz ); } -int64_t Worker::ReadTimelineHaveSize( FileRead& f, ZoneEvent* zone, int64_t refTime, int32_t& childIdx, uint32_t sz ) +int64_t Worker::ReadTimelineHaveSize( FileRead& f, ZoneEvent* zone, ZoneContext* ctx, int64_t refTime, int32_t& childIdx, uint32_t sz ) { if( sz == 0 ) { @@ -7657,43 +7632,21 @@ int64_t Worker::ReadTimelineHaveSize( FileRead& f, ZoneEvent* zone, int64_t refT const auto idx = childIdx; childIdx++; zone->SetChild( idx ); - return ReadTimeline( f, m_data.zoneChildren[idx], sz, refTime, childIdx ); - } -} - -void Worker::ReadTimeline( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx, bool hasQueryId ) -{ - uint64_t sz; - f.Read( sz ); - ReadTimelineHaveSize( f, zone, refTime, refGpuTime, childIdx, sz, hasQueryId ); -} - -void Worker::ReadTimelineHaveSize( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx, uint64_t sz, bool hasQueryId ) -{ - if( sz == 0 ) - { - zone->SetChild( -1 ); - } - else - { - const auto idx = childIdx; - childIdx++; - zone->SetChild( idx ); - ReadTimeline( f, m_data.gpuChildren[idx], sz, refTime, refGpuTime, childIdx, hasQueryId ); + return ReadTimeline( f, m_data.zoneChildren[idx], ctx, sz, refTime, childIdx ); } } #ifndef TRACY_NO_STATISTICS -void Worker::ReconstructZoneStatistics( uint8_t* countMap, ZoneEvent& zone, uint16_t thread ) +void Worker::ReconstructZoneStatistics( uint8_t* countMap, ZoneEvent& zone, ZoneContext& ctx, uint16_t thread ) { assert( zone.IsEndValid() ); auto timeSpan = zone.End() - zone.Start(); if( timeSpan > 0 ) { - auto it = m_data.sourceLocationZones.find( zone.SrcLoc() ); - assert( it != m_data.sourceLocationZones.end() ); + auto it = ctx.sourceLocationZones.find( zone.SrcLoc() ); + assert( it != ctx.sourceLocationZones.end() ); - ZoneThreadData ztd; + ZoneContext::ZoneThreadData ztd; ztd.SetZone( &zone ); ztd.SetThread( thread ); @@ -7740,43 +7693,15 @@ void Worker::ReconstructZoneStatistics( uint8_t* countMap, ZoneEvent& zone, uint } } -void Worker::ReconstructZoneStatistics( GpuEvent& zone, uint16_t thread ) -{ - assert( zone.GpuEnd() >= 0 ); - auto timeSpan = zone.GpuEnd() - zone.GpuStart(); - if( timeSpan > 0 ) - { - auto it = m_data.gpuSourceLocationZones.find( zone.SrcLoc() ); - if( it == m_data.gpuSourceLocationZones.end() ) - { - it = m_data.gpuSourceLocationZones.emplace( zone.SrcLoc(), GpuSourceLocationZones {} ).first; - } - GpuZoneThreadData ztd; - ztd.SetZone( &zone ); - ztd.SetThread( thread ); - auto& slz = it->second; - slz.zones.push_back( ztd ); - if( slz.min > timeSpan ) slz.min = timeSpan; - if( slz.max < timeSpan ) slz.max = timeSpan; - slz.total += timeSpan; - slz.sumSq += double( timeSpan ) * timeSpan; - } -} #else -void Worker::CountZoneStatistics( ZoneEvent* zone ) -{ - auto cnt = GetSourceLocationZonesCnt( zone->SrcLoc() ); - (*cnt)++; -} - -void Worker::CountZoneStatistics( GpuEvent* zone ) +void Worker::CountZoneStatistics( ZoneEvent* zone, ZoneContext* ctx ) { - auto cnt = GetGpuSourceLocationZonesCnt( zone->SrcLoc() ); + auto cnt = ctx->GetSourceLocationZonesCnt( zone->SrcLoc() ); (*cnt)++; } #endif -int64_t Worker::ReadTimeline( FileRead& f, Vector>& _vec, uint32_t size, int64_t refTime, int32_t& childIdx ) +int64_t Worker::ReadTimeline( FileRead& f, Vector>& _vec, ZoneContext* ctx, uint32_t size, int64_t refTime, int32_t& childIdx ) { assert( size != 0 ); const auto lp = s_loadProgress.subProgress.load( std::memory_order_relaxed ); @@ -7797,7 +7722,7 @@ int64_t Worker::ReadTimeline( FileRead& f, Vector>& _vec, u refTime += tstart; zone->SetStartSrcLoc( refTime, srcloc ); zone->extra = extra; - refTime = ReadTimelineHaveSize( f, zone, refTime, childIdx, childSz ); + refTime = ReadTimelineHaveSize( f, zone, ctx, refTime, childIdx, childSz ); f.Read5( tend, srcloc, tstart, extra, childSz ); refTime += tend; zone->SetEnd( refTime ); @@ -7810,7 +7735,7 @@ int64_t Worker::ReadTimeline( FileRead& f, Vector>& _vec, u refTime += tstart; zone->SetStartSrcLoc( refTime, srcloc ); zone->extra = extra; - refTime = ReadTimelineHaveSize( f, zone, refTime, childIdx, childSz ); + refTime = ReadTimelineHaveSize( f, zone, ctx, refTime, childIdx, childSz ); f.Read( tend ); refTime += tend; zone->SetEnd( refTime ); @@ -7821,42 +7746,6 @@ int64_t Worker::ReadTimeline( FileRead& f, Vector>& _vec, u return refTime; } -void Worker::ReadTimeline( FileRead& f, Vector>& _vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx, bool hasQueryId ) -{ - assert( size != 0 ); - const auto lp = s_loadProgress.subProgress.load( std::memory_order_relaxed ); - s_loadProgress.subProgress.store( lp + size, std::memory_order_relaxed ); - auto& vec = *(Vector*)( &_vec ); - vec.set_magic(); - vec.reserve_exact( size, m_slab ); - auto zone = vec.begin(); - auto end = vec.end(); - do - { - int64_t tcpu, tgpu; - int16_t srcloc; - uint16_t thread; - uint64_t childSz; - f.Read6( tcpu, tgpu, srcloc, zone->callstack, thread, childSz ); - zone->SetSrcLoc( srcloc ); - zone->SetThread( thread ); - refTime += tcpu; - refGpuTime += tgpu; - zone->SetCpuStart( refTime ); - zone->SetGpuStart( refGpuTime ); - - ReadTimelineHaveSize( f, zone, refTime, refGpuTime, childIdx, childSz, hasQueryId ); - - f.Read2( tcpu, tgpu ); - refTime += tcpu; - refGpuTime += tgpu; - zone->SetCpuEnd( refTime ); - zone->SetGpuEnd( refGpuTime ); - if( hasQueryId ) f.Read( zone->query_id ); - } - while( ++zone != end ); -} - void Worker::Disconnect() { //Query( ServerQueryDisconnect, 0 ); @@ -7913,6 +7802,15 @@ void Worker::Write( FileWrite& f, bool fiDict ) f.Write( &sz, sizeof( sz ) ); f.Write( m_hostInfo.c_str(), sz ); + sz = m_data.contexts.size(); + f.Write( &sz, sizeof( sz ) ); + for( auto& ctx : m_data.contexts ) + { + ZoneContextType type = ctx->type; + f.Write( &type, sizeof( type ) ); + } + f.Write( &m_defaultCtx, sizeof( m_defaultCtx ) ); + sz = m_data.cpuTopology.size(); f.Write( &sz, sizeof( sz ) ); for( auto& package : m_data.cpuTopology ) @@ -8034,44 +7932,30 @@ void Worker::Write( FileWrite& f, bool fiDict ) } #ifndef TRACY_NO_STATISTICS - sz = m_data.sourceLocationZones.size(); - f.Write( &sz, sizeof( sz ) ); - for( auto& v : m_data.sourceLocationZones ) - { - int16_t id = v.first; - uint64_t cnt = v.second.zones.size(); - f.Write( &id, sizeof( id ) ); - f.Write( &cnt, sizeof( cnt ) ); - } - - sz = m_data.gpuSourceLocationZones.size(); - f.Write( &sz, sizeof( sz ) ); - for( auto& v : m_data.gpuSourceLocationZones ) + for( auto ctx : m_data.contexts ) { - int16_t id = v.first; - uint64_t cnt = v.second.zones.size(); - f.Write( &id, sizeof( id ) ); - f.Write( &cnt, sizeof( cnt ) ); + sz = ctx->sourceLocationZones.size(); + f.Write( &sz, sizeof( sz ) ); + for( auto& v : ctx->sourceLocationZones ) + { + int16_t id = v.first; + uint64_t cnt = v.second.zones.size(); + f.Write( &id, sizeof( id ) ); + f.Write( &cnt, sizeof( cnt ) ); + } } #else - sz = m_data.sourceLocationZonesCnt.size(); - f.Write( &sz, sizeof( sz ) ); - for( auto& v : m_data.sourceLocationZonesCnt ) - { - int16_t id = v.first; - uint64_t cnt = v.second; - f.Write( &id, sizeof( id ) ); - f.Write( &cnt, sizeof( cnt ) ); - } - - sz = m_data.gpuSourceLocationZonesCnt.size(); - f.Write( &sz, sizeof( sz ) ); - for( auto& v : m_data.gpuSourceLocationZonesCnt ) + for( auto ctx : m_data.contexts ) { - int16_t id = v.first; - uint64_t cnt = v.second; - f.Write( &id, sizeof( id ) ); - f.Write( &cnt, sizeof( cnt ) ); + sz = ctx->sourceLocationZonesCnt.size(); + f.Write( &sz, sizeof( sz ) ); + for( auto& v : ctx->sourceLocationZonesCnt ) + { + int16_t id = v.first; + uint64_t cnt = v.second; + f.Write( &id, sizeof( id ) ); + f.Write( &cnt, sizeof( cnt ) ); + } } #endif @@ -8125,14 +8009,15 @@ void Worker::Write( FileWrite& f, bool fiDict ) f.Write( m_data.zoneExtra.data(), sz * sizeof( ZoneExtra ) ); sz = 0; - for( auto& v : m_data.threads ) sz += v->count; + for( auto& v : GetDefaultCtx().threads ) sz += v->count; f.Write( &sz, sizeof( sz ) ); sz = m_data.zoneChildren.size(); f.Write( &sz, sizeof( sz ) ); - sz = m_data.threads.size(); + sz = GetDefaultCtx().threads.size(); f.Write( &sz, sizeof( sz ) ); - for( auto& thread : m_data.threads ) + for( auto& td : GetDefaultCtx().threads ) { + auto thread = static_cast(td); int64_t refTime = 0; f.Write( &thread->id, sizeof( thread->id ) ); f.Write( &thread->count, sizeof( thread->count ) ); @@ -8174,14 +8059,12 @@ void Worker::Write( FileWrite& f, bool fiDict ) } sz = 0; - for( auto& v : m_data.gpuData ) sz += v->count; - f.Write( &sz, sizeof( sz ) ); - sz = m_data.gpuChildren.size(); + for( auto& v : m_data.contexts ) sz += v->count; f.Write( &sz, sizeof( sz ) ); - sz = m_data.gpuData.size(); - f.Write( &sz, sizeof( sz ) ); - for( auto& ctx : m_data.gpuData ) + for( auto& cntx : m_data.contexts ) { + if (cntx->type == ZoneContextType::CPU) continue; + auto ctx = static_cast(cntx); f.Write( &ctx->thread, sizeof( ctx->thread ) ); uint8_t calibration = ctx->hasCalibration; f.Write( &calibration, sizeof( calibration ) ); @@ -8205,7 +8088,7 @@ void Worker::Write( FileWrite& f, bool fiDict ) int64_t refGpuTime = 0; uint64_t tid = td.first; f.Write( &tid, sizeof( tid ) ); - WriteTimeline( f, td.second.timeline, refTime, refGpuTime ); + WriteTimeline( f, td.second->timeline, refTime ); } sz = ctx->notes.size(); @@ -8403,7 +8286,7 @@ void Worker::Write( FileWrite& f, bool fiDict ) ctxValid.reserve( m_data.ctxSwitch.size() ); for( auto it = m_data.ctxSwitch.begin(); it != m_data.ctxSwitch.end(); ++it ) { - auto td = RetrieveThread( it->first ); + auto td = static_cast(RetrieveThread( it->first )); if( td && ( td->count > 0 || !td->samples.empty() ) ) { ctxValid.emplace_back( it ); @@ -8560,51 +8443,6 @@ void Worker::WriteTimelineImpl( FileWrite& f, const V& vec, int64_t& refTime ) } } -void Worker::WriteTimeline( FileWrite& f, const Vector>& vec, int64_t& refTime, int64_t& refGpuTime ) -{ - uint64_t sz = vec.size(); - f.Write( &sz, sizeof( sz ) ); - if( vec.is_magic() ) - { - WriteTimelineImpl>( f, *(Vector*)( &vec ), refTime, refGpuTime ); - } - else - { - WriteTimelineImpl>( f, vec, refTime, refGpuTime ); - } -} - -template -void Worker::WriteTimelineImpl( FileWrite& f, const V& vec, int64_t& refTime, int64_t& refGpuTime ) -{ - Adapter a; - for( auto& val : vec ) - { - auto& v = a(val); - WriteTimeOffset( f, refTime, v.CpuStart() ); - WriteTimeOffset( f, refGpuTime, v.GpuStart() ); - const int16_t srcloc = v.SrcLoc(); - f.Write( &srcloc, sizeof( srcloc ) ); - f.Write( &v.callstack, sizeof( v.callstack ) ); - const uint16_t thread = v.Thread(); - f.Write( &thread, sizeof( thread ) ); - - if( v.Child() < 0 ) - { - const uint64_t sz = 0; - f.Write( &sz, sizeof( sz ) ); - } - else - { - WriteTimeline( f, GetGpuChildren( v.Child() ), refTime, refGpuTime ); - } - - WriteTimeOffset( f, refTime, v.CpuEnd() ); - WriteTimeOffset( f, refGpuTime, v.GpuEnd() ); - f.Write( &v.query_id, sizeof( v.query_id ) ); - } -} - static const char* s_failureReasons[] = { "", "Invalid order of zone begin and end events.", diff --git a/server/TracyWorker.hpp b/server/TracyWorker.hpp index 2aa09a0eb3..7a410b827a 100644 --- a/server/TracyWorker.hpp +++ b/server/TracyWorker.hpp @@ -19,6 +19,7 @@ #include "../public/common/TracySocket.hpp" #include "tracy_robin_hood.h" #include "TracyEvent.hpp" +#include "TracyContext.hpp" #include "TracyShortPtr.hpp" #include "TracySlab.hpp" #include "TracyStringDiscovery.hpp" @@ -123,28 +124,6 @@ class Worker std::vector> data; }; - struct ZoneThreadData - { - tracy_force_inline ZoneEvent* Zone() const { return (ZoneEvent*)( _zone_thread >> 16 ); } - tracy_force_inline void SetZone( ZoneEvent* zone ) { auto z64 = (uint64_t)zone; assert( ( z64 & 0xFFFF000000000000 ) == 0 ); memcpy( ((char*)&_zone_thread)+2, &z64, 4 ); memcpy( ((char*)&_zone_thread)+6, ((char*)&z64)+4, 2 ); } - tracy_force_inline uint16_t Thread() const { return uint16_t( _zone_thread & 0xFFFF ); } - tracy_force_inline void SetThread( uint16_t thread ) { memcpy( &_zone_thread, &thread, 2 ); } - - uint64_t _zone_thread; - }; - enum { ZoneThreadDataSize = sizeof( ZoneThreadData ) }; - - struct GpuZoneThreadData - { - tracy_force_inline GpuEvent* Zone() const { return (GpuEvent*)( _zone_thread >> 16 ); } - tracy_force_inline void SetZone( GpuEvent* zone ) { auto z64 = (uint64_t)zone; assert( ( z64 & 0xFFFF000000000000 ) == 0 ); memcpy( ((char*)&_zone_thread)+2, &z64, 4 ); memcpy( ((char*)&_zone_thread)+6, ((char*)&z64)+4, 2 ); } - tracy_force_inline uint16_t Thread() const { return uint16_t( _zone_thread & 0xFFFF ); } - tracy_force_inline void SetThread( uint16_t thread ) { memcpy( &_zone_thread, &thread, 2 ); } - - uint64_t _zone_thread; - }; - enum { GpuZoneThreadDataSize = sizeof( GpuZoneThreadData ) }; - struct CpuThreadTopology { uint32_t package; @@ -196,36 +175,6 @@ class Worker }; private: - struct SourceLocationZones - { - struct ZtdSort { bool operator()( const ZoneThreadData& lhs, const ZoneThreadData& rhs ) const { return lhs.Zone()->Start() < rhs.Zone()->Start(); } }; - - SortedVector zones; - int64_t min = std::numeric_limits::max(); - int64_t max = std::numeric_limits::min(); - int64_t total = 0; - double sumSq = 0; - int64_t selfMin = std::numeric_limits::max(); - int64_t selfMax = std::numeric_limits::min(); - int64_t selfTotal = 0; - size_t nonReentrantCount = 0; - int64_t nonReentrantMin = std::numeric_limits::max(); - int64_t nonReentrantMax = std::numeric_limits::min(); - int64_t nonReentrantTotal = 0; - unordered_flat_map threadCnt; - }; - - struct GpuSourceLocationZones - { - struct GpuZtdSort { bool operator()( const GpuZoneThreadData& lhs, const GpuZoneThreadData& rhs ) const { return lhs.Zone()->GpuStart() < rhs.Zone()->GpuStart(); } }; - - SortedVector zones; - int64_t min = std::numeric_limits::max(); - int64_t max = std::numeric_limits::min(); - int64_t total = 0; - double sumSq = 0; - }; - struct CallstackFrameIdHash { size_t operator()( const CallstackFrameId& id ) const { return id.data; } @@ -282,10 +231,9 @@ class Worker std::mutex lock; StringDiscovery frames; FrameData* framesBase; - Vector gpuData; + Vector contexts; Vector> messages; StringDiscovery plots; - Vector threads; Vector zoneExtra; MemData* memory; unordered_flat_map memNameMap; @@ -310,15 +258,6 @@ class Worker Vector> sourceLocationPayload; unordered_flat_map sourceLocationPayloadMap; Vector sourceLocationExpand; -#ifndef TRACY_NO_STATISTICS - unordered_flat_map sourceLocationZones; - bool sourceLocationZonesReady = false; - unordered_flat_map gpuSourceLocationZones; - bool gpuSourceLocationZonesReady = false; -#else - unordered_flat_map sourceLocationZonesCnt; - unordered_flat_map gpuSourceLocationZonesCnt; -#endif unordered_flat_map*, uint32_t, VarArrayHasher, VarArrayComparator> callstackMap; Vector>> callstackPayload; @@ -357,7 +296,6 @@ class Worker ThreadCompress externalThreadCompress; Vector>> zoneChildren; - Vector>> gpuChildren; #ifndef TRACY_NO_STATISTICS Vector> ghostChildren; Vector ghostFrames; @@ -382,13 +320,6 @@ class Worker std::pair ctxSwitchLast = std::make_pair( std::numeric_limits::max(), nullptr ); uint64_t checkSrclocLast = 0; std::pair shrinkSrclocLast = std::make_pair( std::numeric_limits::max(), 0 ); -#ifndef TRACY_NO_STATISTICS - std::pair srclocZonesLast = std::make_pair( 0, nullptr ); - std::pair gpuZonesLast = std::make_pair( 0, nullptr ); -#else - std::pair srclocCntLast = std::make_pair( 0, nullptr ); - std::pair gpuCntLast = std::make_pair( 0, nullptr ); -#endif #ifndef TRACY_NO_STATISTICS Vector ctxUsage; @@ -561,10 +492,12 @@ class Worker const unordered_flat_map& GetLockMap() const { return m_data.lockMap; } const Vector>& GetMessages() const { return m_data.messages; } - const Vector& GetGpuData() const { return m_data.gpuData; } + tracy_force_inline const ZoneContext& GetDefaultCtx() const { assert(m_defaultCtx < UINT8_MAX); return *m_ctxMap[m_defaultCtx]; } + tracy_force_inline ZoneContext& GetDefaultCtx() { assert(m_defaultCtx < UINT8_MAX); return *m_ctxMap[m_defaultCtx]; } + const Vector& GetCtxData() const { return m_data.contexts; } + const std::string& GetCtxName( ZoneContext* ctx ) const; + const std::string& GetCtxName( uint8_t idx ) const; const Vector& GetPlots() const { return m_data.plots.Data(); } - const Vector& GetThreadData() const { return m_data.threads; } - const ThreadData* GetThreadData( uint64_t tid ) const; const MemData& GetMemoryNamed( uint64_t name ) const; const unordered_flat_map& GetMemNameMap() const { return m_data.memNameMap; } const Vector>& GetFrameImages() const { return m_data.frameImage; } @@ -587,6 +520,7 @@ class Worker unordered_flat_map& GetCallstackFrameMap() { return m_data.callstackFrameMap; } #ifndef TRACY_NO_STATISTICS + void UpdateStats( ZoneEvent* zone, ThreadData& td, bool isReentry, int64_t timeEnd ); const VarArray& GetParentCallstack( uint32_t idx ) const { return *m_data.parentCallstackPayload[idx]; } const CallstackFrameData* GetParentCallstackFrame( const CallstackFrameId& ptr ) const; const Vector* GetSamplesForSymbol( uint64_t symAddr ) const; @@ -599,10 +533,8 @@ class Worker // GetZoneEnd() will try to infer the end time by looking at child zones (parent zone can't end // before its children have ended). // GetZoneEndDirect() will only return zone's direct timing data, without looking at children. - tracy_force_inline int64_t GetZoneEnd( const ZoneEvent& ev ) { return ev.IsEndValid() ? ev.End() : GetZoneEndImpl( ev ); } - tracy_force_inline int64_t GetZoneEnd( const GpuEvent& ev ) { return ev.GpuEnd() >= 0 ? ev.GpuEnd() : GetZoneEndImpl( ev ); } + tracy_force_inline int64_t GetZoneEnd( const ZoneEvent& ev ) const { return ev.IsEndValid() ? ev.End() : GetZoneEndImpl( ev ); } static tracy_force_inline int64_t GetZoneEndDirect( const ZoneEvent& ev ) { return ev.IsEndValid() ? ev.End() : ev.Start(); } - static tracy_force_inline int64_t GetZoneEndDirect( const GpuEvent& ev ) { return ev.GpuEnd() >= 0 ? ev.GpuEnd() : ev.GpuStart(); } uint32_t FindStringIdx( const char* str ) const; const char* GetString( uint64_t ptr ) const; @@ -617,10 +549,9 @@ class Worker const char* GetZoneName( const SourceLocation& srcloc ) const; const char* GetZoneName( const ZoneEvent& ev ) const; const char* GetZoneName( const ZoneEvent& ev, const SourceLocation& srcloc ) const; - const char* GetZoneName( const GpuEvent& ev ) const; tracy_force_inline const Vector>& GetZoneChildren( int32_t idx ) const { return m_data.zoneChildren[idx]; } - tracy_force_inline const Vector>& GetGpuChildren( int32_t idx ) const { return m_data.gpuChildren[idx]; } + tracy_force_inline Vector>& GetZoneChildren( int32_t idx ) { return m_data.zoneChildren[idx]; } #ifndef TRACY_NO_STATISTICS tracy_force_inline const Vector& GetGhostChildren( int32_t idx ) const { return m_data.ghostChildren[idx]; } tracy_force_inline const GhostKey& GetGhostFrame( const Int24& frame ) const { return m_data.ghostFrames[frame.Val()]; } @@ -634,12 +565,9 @@ class Worker const unordered_flat_map& GetSymbolMap() const { return m_data.symbolMap; } #ifndef TRACY_NO_STATISTICS - SourceLocationZones& GetZonesForSourceLocation( int16_t srcloc ); - const SourceLocationZones& GetZonesForSourceLocation( int16_t srcloc ) const; - const unordered_flat_map& GetSourceLocationZones() const { return m_data.sourceLocationZones; } - const unordered_flat_map& GetGpuSourceLocationZones() const { return m_data.gpuSourceLocationZones; } - bool AreSourceLocationZonesReady() const { return m_data.sourceLocationZonesReady; } - bool AreGpuSourceLocationZonesReady() const { return m_data.gpuSourceLocationZonesReady; } + pair GetZonesForSourceLocation( int16_t srcloc ); + bool AreSourceLocationZonesReady() const; + bool IsCpuUsageReady() const { return m_data.ctxUsageReady; } const Vector& GetCpuUsage() const { return m_data.ctxUsage; } @@ -758,6 +686,7 @@ class Worker tracy_force_inline void ProcessMessageColorCallstack( const QueueMessageColor& ev ); tracy_force_inline void ProcessMessageLiteralColorCallstack( const QueueMessageColorLiteral& ev ); tracy_force_inline void ProcessMessageAppInfo( const QueueMessage& ev ); + tracy_force_inline void ProcessCpuNewContext(); tracy_force_inline void ProcessGpuNewContext( const QueueGpuNewContext& ev ); tracy_force_inline void ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev, bool serial ); tracy_force_inline void ProcessGpuZoneBeginCallstack( const QueueGpuZoneBegin& ev, bool serial ); @@ -810,16 +739,16 @@ class Worker tracy_force_inline ZoneEvent* AllocZoneEvent(); tracy_force_inline void ProcessZoneBeginImpl( ZoneEvent* zone, const QueueZoneBegin& ev ); tracy_force_inline void ProcessZoneBeginAllocSrcLocImpl( ZoneEvent* zone, const QueueZoneBeginLean& ev ); - tracy_force_inline void ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& ev, bool serial ); - tracy_force_inline void ProcessGpuZoneBeginAllocSrcLocImpl( GpuEvent* zone, const QueueGpuZoneBeginLean& ev, bool serial ); - tracy_force_inline void ProcessGpuZoneBeginImplCommon( GpuEvent* zone, const QueueGpuZoneBeginLean& ev, bool serial ); + tracy_force_inline void ProcessGpuZoneBeginImpl( ZoneEvent* zone, const QueueGpuZoneBegin& ev, bool serial ); + tracy_force_inline void ProcessGpuZoneBeginAllocSrcLocImpl( ZoneEvent* zone, const QueueGpuZoneBeginLean& ev, bool serial ); + tracy_force_inline void ProcessGpuZoneBeginImplCommon( ZoneEvent* zone, const QueueGpuZoneBeginLean& ev, bool serial ); tracy_force_inline void ProcessPlotDataImpl( uint64_t name, int64_t evTime, double val ); tracy_force_inline MemEvent* ProcessMemAllocImpl( MemData& memdata, const QueueMemAlloc& ev ); tracy_force_inline MemEvent* ProcessMemFreeImpl( MemData& memdata, const QueueMemFree& ev ); - tracy_force_inline void ProcessCallstackSampleImpl( const SampleData& sd, ThreadData& td ); - tracy_force_inline void ProcessCallstackSampleInsertSample( const SampleData& sd, ThreadData& td ); + tracy_force_inline void ProcessCallstackSampleImpl( const SampleData& sd, CPUThreadData& td ); + tracy_force_inline void ProcessCallstackSampleInsertSample( const SampleData& sd, CPUThreadData& td ); #ifndef TRACY_NO_STATISTICS - tracy_force_inline void ProcessCallstackSampleImplStats( const SampleData& sd, ThreadData& td ); + tracy_force_inline void ProcessCallstackSampleImplStats( const SampleData& sd, CPUThreadData& td ); #endif void ZoneStackFailure( uint64_t thread, const ZoneEvent* ev ); @@ -853,7 +782,7 @@ class Worker void InsertMessageData( MessageData* msg ); ThreadData* NoticeThreadReal( uint64_t thread ); - ThreadData* NewThread( uint64_t thread, bool fiber, int32_t groupHint ); + CPUThreadData* NewThread( uint64_t thread, bool fiber, int32_t groupHint ); tracy_force_inline ThreadData* NoticeThread( uint64_t thread ) { if( m_data.threadDataLast.first == thread ) return m_data.threadDataLast.second; @@ -868,36 +797,6 @@ class Worker tracy_force_inline ThreadData* GetCurrentThreadData(); -#ifndef TRACY_NO_STATISTICS - SourceLocationZones* GetSourceLocationZones( uint16_t srcloc ) - { - if( m_data.srclocZonesLast.first == srcloc ) return m_data.srclocZonesLast.second; - return GetSourceLocationZonesReal( srcloc ); - } - SourceLocationZones* GetSourceLocationZonesReal( uint16_t srcloc ); - - GpuSourceLocationZones* GetGpuSourceLocationZones( uint16_t srcloc ) - { - if( m_data.gpuZonesLast.first == srcloc ) return m_data.gpuZonesLast.second; - return GetGpuSourceLocationZonesReal( srcloc ); - } - GpuSourceLocationZones* GetGpuSourceLocationZonesReal( uint16_t srcloc ); -#else - uint64_t* GetSourceLocationZonesCnt( uint16_t srcloc ) - { - if( m_data.srclocCntLast.first == srcloc ) return m_data.srclocCntLast.second; - return GetSourceLocationZonesCntReal( srcloc ); - } - uint64_t* GetSourceLocationZonesCntReal( uint16_t srcloc ); - - uint64_t* GetGpuSourceLocationZonesCnt( uint16_t srcloc ) - { - if( m_data.gpuCntLast.first == srcloc ) return m_data.gpuCntLast.second; - return GetGpuSourceLocationZonesCntReal( srcloc ); - } - uint64_t* GetGpuSourceLocationZonesCntReal( uint16_t srcloc ); -#endif - tracy_force_inline void NewZone( ZoneEvent* zone ); void InsertLockEvent( LockMap& lockmap, LockEvent* lev, uint64_t thread, int64_t time ); @@ -947,8 +846,6 @@ class Worker void CacheSource( const StringRef& str, const StringIdx& image = StringIdx() ); void CacheSourceFromFile( const char* fn ); - tracy_force_inline Vector>& GetZoneChildrenMutable( int32_t idx ) { return m_data.zoneChildren[idx]; } - tracy_force_inline Vector>& GetGpuChildrenMutable( int32_t idx ) { return m_data.gpuChildren[idx]; } #ifndef TRACY_NO_STATISTICS tracy_force_inline Vector& GetGhostChildrenMutable( int32_t idx ) { return m_data.ghostChildren[idx]; } #endif @@ -962,37 +859,28 @@ class Worker tracy_force_inline int AddGhostZone( const VarArray& cs, Vector* vec, uint64_t t ); #endif - tracy_force_inline int64_t ReadTimeline( FileRead& f, ZoneEvent* zone, int64_t refTime, int32_t& childIdx ); - tracy_force_inline int64_t ReadTimelineHaveSize( FileRead& f, ZoneEvent* zone, int64_t refTime, int32_t& childIdx, uint32_t sz ); - tracy_force_inline void ReadTimeline( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx, bool hasQueryId ); - tracy_force_inline void ReadTimelineHaveSize( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx, uint64_t sz, bool hasQueryId ); + tracy_force_inline int64_t ReadTimeline( FileRead& f, ZoneEvent* zone, ZoneContext* ctx, int64_t refTime, int32_t& childIdx ); + tracy_force_inline int64_t ReadTimelineHaveSize( FileRead& f, ZoneEvent* zone, ZoneContext* ctx, int64_t refTime, int32_t& childIdx, uint32_t sz ); #ifndef TRACY_NO_STATISTICS - tracy_force_inline void ReconstructZoneStatistics( uint8_t* countMap, ZoneEvent& zone, uint16_t thread ); - tracy_force_inline void ReconstructZoneStatistics( GpuEvent& zone, uint16_t thread ); + tracy_force_inline void ReconstructZoneStatistics( uint8_t* countMap, ZoneEvent& zone, ZoneContext& ctx, uint16_t thread ); #else tracy_force_inline void CountZoneStatistics( ZoneEvent* zone ); - tracy_force_inline void CountZoneStatistics( GpuEvent* zone ); #endif tracy_force_inline ZoneExtra& GetZoneExtraMutable( const ZoneEvent& ev ) { return m_data.zoneExtra[ev.extra]; } tracy_force_inline ZoneExtra& AllocZoneExtra( ZoneEvent& ev ); tracy_force_inline ZoneExtra& RequestZoneExtra( ZoneEvent& ev ); - int64_t GetZoneEndImpl( const ZoneEvent& ev ); - int64_t GetZoneEndImpl( const GpuEvent& ev ); + int64_t GetZoneEndImpl( const ZoneEvent& ev ) const; void UpdateMbps( int64_t td ); - int64_t ReadTimeline( FileRead& f, Vector>& vec, uint32_t size, int64_t refTime, int32_t& childIdx ); - void ReadTimeline( FileRead& f, Vector>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx, bool hasQueryId ); + int64_t ReadTimeline( FileRead& f, Vector>& vec, ZoneContext* ctx, uint32_t size, int64_t refTime, int32_t& childIdx ); tracy_force_inline void WriteTimeline( FileWrite& f, const Vector>& vec, int64_t& refTime ); - tracy_force_inline void WriteTimeline( FileWrite& f, const Vector>& vec, int64_t& refTime, int64_t& refGpuTime ); template void WriteTimelineImpl( FileWrite& f, const V& vec, int64_t& refTime ); - template - void WriteTimelineImpl( FileWrite& f, const V& vec, int64_t& refTime, int64_t& refGpuTime ); int64_t TscTime( int64_t tsc ) { return int64_t( ( tsc - m_data.baseTime ) * m_timerMul ); } int64_t TscTime( uint64_t tsc ) { return int64_t( ( tsc - m_data.baseTime ) * m_timerMul ); } @@ -1035,12 +923,12 @@ class Worker bool m_inconsistentSamples; bool m_allowStringModification = false; - short_ptr m_gpuCtxMap[256]; + short_ptr m_ctxMap[256]; uint32_t m_pendingCallstackId = 0; int16_t m_pendingSourceLocationPayload = 0; Vector m_sourceLocationQueue; unordered_flat_map m_sourceLocationShrink; - unordered_flat_map m_threadMap; + uint8_t m_defaultCtx { UINT8_MAX }; unordered_flat_map m_vsyncFrameMap; FrameImagePending m_pendingFrameImageData = {}; unordered_flat_map m_pendingSymbols; @@ -1091,9 +979,6 @@ class Worker size_t m_frameImageBufferSize = 0; TextureCompression m_texcomp; - uint64_t m_threadCtx = 0; - ThreadData* m_threadCtxData = nullptr; - int64_t m_refTimeThread = 0; int64_t m_refTimeSerial = 0; int64_t m_refTimeCtx = 0; int64_t m_refTimeGpu = 0;