We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent c71dfa4 commit c4d1343Copy full SHA for c4d1343
1 file changed
src/cupti.cpp
@@ -528,8 +528,11 @@ class CuptiProfiler : public proton::Singleton<CuptiProfiler> {
528
CUPTI_RUNTIME_TRACE_CBID_cudaGraphLaunch_ptsz_v10000);
529
}
530
531
- // Rate limit probes using token bucket (skip for graph launches)
532
- if (!isGraphLaunch) {
+ // Rate limit probes using token bucket. Skip rate limiting for graph
+ // launches (they share one correlation ID across many kernels) and when
533
+ // PC sampling is active (every kernel needs its correlation callback so
534
+ // PC samples can be matched with CPU stacks on the agent side).
535
+ if (!isGraphLaunch && !g_pcSamplingState.active) {
536
if (!callbackLimiter.tryAcquire()) {
537
DEBUG_PRINTF(
538
"[PARCAGPU] Rate limited: skipping probe for correlationId=%u\n",
0 commit comments