Skip to content

Commit 9cce999

Browse files
gnurizenclaude
andcommitted
Move CUDA init_parcagpu to TestMain to fix singleton reuse
InitializeInjection in libparcagpucupti.so is a singleton — it only registers CUPTI callbacks on the first call. When TestCUDAEndToEndSingleShot cleaned up and TestCUDAEndToEndMultiProbe called init_parcagpu again, the singleton returned 1 without re-registering, leaving callback pointers NULL. Load the library once in TestMain and clean up once at exit. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 3abfe7d commit 9cce999

1 file changed

Lines changed: 26 additions & 16 deletions

File tree

test/cudaverify/cuda_verifier_test.go

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,32 @@ import (
2323

2424
var soPath = flag.String("so-path", "/libparcagpucupti.so", "path to libparcagpucupti.so")
2525

26+
func TestMain(m *testing.M) {
27+
flag.Parse()
28+
29+
if os.Getuid() == 0 {
30+
rc := cInitParcaGPU(*soPath)
31+
if rc != 0 {
32+
os.Exit(1)
33+
}
34+
}
35+
36+
code := m.Run()
37+
38+
if os.Getuid() == 0 {
39+
cCleanupParcaGPU()
40+
}
41+
42+
os.Exit(code)
43+
}
44+
2645
// runEndToEnd exercises the full process-manager driven GPU probe attachment flow:
2746
//
2847
// 1. Start the full tracer pipeline (PID event processor, map monitors, profiling).
29-
// 2. ForceProcessPID to trigger initial process sync — this causes the tracer to
30-
// read our /proc/self/maps, discover libc, and attach the dlopen uprobe via rtld.
31-
// 3. Wait until the dlopen uprobe is confirmed attached (metric increments).
32-
// 4. dlopen libparcagpu — the dlopen uprobe fires, triggering a re-sync that
33-
// discovers libparcagpu and automatically attaches the GPU USDT probes.
34-
// 5. Verify GPU interpreter instance is attached, then simulate kernel launches
48+
// 2. ForceProcessPID to trigger process sync — the tracer reads /proc/self/maps,
49+
// discovers libc and libparcagpucupti.so (loaded in TestMain), and attaches
50+
// the GPU USDT probes.
51+
// 3. Verify GPU interpreter instance is attached, then simulate kernel launches
3552
// and check that timing events arrive on the perf buffer.
3653
func runEndToEnd(t *testing.T, multiProbe bool) {
3754
t.Helper()
@@ -70,23 +87,16 @@ func runEndToEnd(t *testing.T, multiProbe bool) {
7087
return false
7188
}, 30*time.Second, 200*time.Millisecond, "process manager never synced our PID")
7289

73-
// Set up perf reader on the cuda_timing_events map BEFORE the dlopen so we
74-
// don't miss any events.
90+
// Set up perf reader on the cuda_timing_events map before simulation.
7591
timingMap := trc.GetEbpfMaps()["cuda_timing_events"]
7692
require.NotNil(t, timingMap, "cuda_timing_events map not found")
7793

7894
reader, err := perf.NewReader(timingMap, 1024*1024)
7995
require.NoError(t, err, "perf.NewReader failed")
8096
defer reader.Close()
8197

82-
// dlopen libparcagpu — this fires the dlopen uprobe, which causes a PID
83-
// re-sync. The process manager will discover the newly mapped .so, the GPU
84-
// loader will find its USDT probes, and Attach will hook them up.
85-
rc := cInitParcaGPU(*soPath)
86-
require.Equal(t, 0, rc, "init_parcagpu (dlopen) failed")
87-
defer cCleanupParcaGPU()
88-
89-
// Speed up the re-sync after dlopen.
98+
// libparcagpucupti.so was loaded in TestMain — ForceProcessPID will
99+
// discover it from /proc/self/maps and attach the GPU USDT probes.
90100
trc.ForceProcessPID(pid)
91101

92102
// Wait until the GPU interpreter instance appears, confirming the USDT

0 commit comments

Comments
 (0)