Skip to content

Commit 4a2ec8e

Browse files
committed
Fix infinite recursion and make sure to get all samples
1 parent 54ad034 commit 4a2ec8e

2 files changed

Lines changed: 34 additions & 17 deletions

File tree

src/cupti.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ static double threadRandom(PCSamplingState &s) {
8989
void init_debug() {
9090
static bool initialized = false;
9191
if (!initialized) {
92+
initialized = true;
9293
debug_enabled = getenv("PARCAGPU_DEBUG") != nullptr;
9394
const char *rateEnv = getenv("PARCAGPU_RATE_LIMIT");
9495
if (rateEnv != nullptr) {

src/pc_sampling.cpp

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ CUpti_PCSamplingData allocPCSamplingData(size_t collectNumPCs,
270270
static_cast<CUpti_PCSamplingPCData *>(
271271
std::calloc(collectNumPCs, sizeof(CUpti_PCSamplingPCData)))};
272272
for (size_t i = 0; i < collectNumPCs; ++i) {
273+
pcSamplingData.pPcData[i].size = sizeof(CUpti_PCSamplingPCData);
273274
pcSamplingData.pPcData[i].stallReason =
274275
static_cast<CUpti_PCSamplingStallReason *>(std::calloc(
275276
numValidStallReasons, sizeof(CUpti_PCSamplingStallReason)));
@@ -647,16 +648,22 @@ void PCSampling::collectData(CUcontext context) {
647648
contextId, configureData->pcSamplingData.totalNumPcs,
648649
configureData->pcSamplingData.remainingNumPcs);
649650

650-
// Use the separate output buffer for getData — the configured
651-
// pcSamplingData buffer is owned by CUPTI.
652-
bool ok = getPCSamplingData(context, &configureData->outputData);
653-
DEBUG_PRINTF("getData: ok=%d output total=%zu remaining=%zu "
654-
"cfg total=%zu remaining=%zu\n",
655-
ok, configureData->outputData.totalNumPcs,
656-
configureData->outputData.remainingNumPcs,
657-
configureData->pcSamplingData.totalNumPcs,
658-
configureData->pcSamplingData.remainingNumPcs);
659-
processPCSamplingData(configureData);
651+
// Drain all available PCs in a loop. Each getData call returns at most
652+
// DataBufferPCCount (1024) PCs; a single sampling window can produce
653+
// tens of thousands. Failing to drain leaves data in CUPTI's internal
654+
// buffers, which eventually causes CUPTI_ERROR_OUT_OF_MEMORY (error 8).
655+
do {
656+
bool ok = getPCSamplingData(context, &configureData->outputData);
657+
DEBUG_PRINTF("getData: ok=%d output total=%zu remaining=%zu "
658+
"cfg total=%zu remaining=%zu\n",
659+
ok, configureData->outputData.totalNumPcs,
660+
configureData->outputData.remainingNumPcs,
661+
configureData->pcSamplingData.totalNumPcs,
662+
configureData->pcSamplingData.remainingNumPcs);
663+
if (!ok)
664+
break;
665+
processPCSamplingData(configureData);
666+
} while (configureData->outputData.remainingNumPcs > 0);
660667
}
661668

662669
void PCSampling::collectAllData() {
@@ -671,7 +678,14 @@ void PCSampling::collectAllData() {
671678
}
672679
auto *configureData = &result->get();
673680
DEBUG_PRINTF("Draining PC sampling data for context %u\n", contextId);
674-
processPCSamplingData(configureData);
681+
// Fetch and drain all pending data from CUPTI.
682+
do {
683+
bool ok = getPCSamplingData(configureData->context,
684+
&configureData->outputData);
685+
if (!ok)
686+
break;
687+
processPCSamplingData(configureData);
688+
} while (configureData->outputData.remainingNumPcs > 0);
675689
}
676690
}
677691

@@ -706,14 +720,16 @@ void PCSampling::finalize(CUcontext context) {
706720
}
707721
}
708722

709-
// Drain remaining data before disabling
723+
// Drain all remaining PC data before disabling.
710724
auto *configureData = getConfigureData(contextId);
711-
processPCSamplingData(configureData);
712-
713-
// After disable, CUPTI may fill remaining records — drain once more
714-
if (configureData->pcSamplingData.totalNumPcs > 0) {
725+
do {
726+
bool ok = getPCSamplingData(context, &configureData->outputData);
727+
if (!ok)
728+
break;
715729
processPCSamplingData(configureData);
716-
}
730+
} while (configureData->outputData.remainingNumPcs > 0);
731+
732+
disablePCSampling(context);
717733

718734
contextIdToConfigureData.erase(contextId);
719735
contextInitialized.erase(contextId);

0 commit comments

Comments
 (0)