@@ -34,11 +34,12 @@ var (
3434)
3535
3636// SymbolizedCudaTrace holds a symbolized trace awaiting GPU timing information.
37- // The CPU frames are already symbolized; only the CUDA kernel frame (frame[0])
37+ // The CPU frames are already symbolized; only the CUDA kernel frame
3838// needs the kernel name from the timing event.
3939type SymbolizedCudaTrace struct {
4040 Trace * libpf.Trace
4141 Meta * samples.TraceEventMeta
42+ CUDAFrameIdx int // index of CUDAKernelFrame in Trace.Frames
4243 CorrelationID uint32
4344 CBID int32
4445}
@@ -60,9 +61,9 @@ type CudaTraceOutput struct {
6061// that launched the kernel."
6162type gpuTraceFixer struct {
6263 mu sync.Mutex
63- timesAwaitingTraces map [uint32 ][]CuptiTimingEvent // keyed by correlation ID
64- tracesAwaitingTimes map [uint32 ]* SymbolizedCudaTrace // keyed by correlation ID
65- maxCorrelationId uint32 // track highest ID for threshold-based clearing
64+ timesAwaitingTraces map [uint32 ][]CuptiTimingEvent // keyed by correlation ID
65+ tracesAwaitingTimes map [uint32 ]* SymbolizedCudaTrace // keyed by correlation ID
66+ maxCorrelationId uint32 // track highest ID for threshold-based clearing
6667}
6768
6869type data struct {
@@ -343,7 +344,7 @@ func (f *gpuTraceFixer) prepTrace(st *SymbolizedCudaTrace, ev *CuptiTimingEvent)
343344 out .Trace .CustomLabels ["cuda_id" ] = strconv .FormatUint (uint64 (ev .Id ), 10 )
344345 }
345346
346- // Extract kernel name from timing event, demangle, and update frame[0]
347+ // Extract kernel name from timing event, demangle, and update the CUDA frame.
347348 nameBytes := ev .KernelName [:]
348349 if idx := bytes .IndexByte (nameBytes , 0 ); idx >= 0 {
349350 nameBytes = nameBytes [:idx ]
@@ -356,11 +357,10 @@ func (f *gpuTraceFixer) prepTrace(st *SymbolizedCudaTrace, ev *CuptiTimingEvent)
356357 funcName = libpf .Intern (demStr )
357358 }
358359
359- currentFrame := out .Trace .Frames [0 ].Value ()
360- out .Trace .Frames [0 ] = unique .Make (libpf.Frame {
361- Type : currentFrame .Type ,
362- AddressOrLineno : currentFrame .AddressOrLineno ,
363- FunctionName : funcName ,
360+ fi := st .CUDAFrameIdx
361+ out .Trace .Frames [fi ] = unique .Make (libpf.Frame {
362+ Type : out .Trace .Frames [fi ].Value ().Type ,
363+ FunctionName : funcName ,
364364 })
365365 }
366366
@@ -375,7 +375,7 @@ func AddTrace(st *SymbolizedCudaTrace) []CudaTraceOutput {
375375 pid := st .Meta .PID
376376 value , ok := gpuFixers .Load (pid )
377377 if ! ok {
378- log .Warnf ("no GPU fixer found for PID %d" , pid )
378+ log .Warnf ("no GPU fixer found for PID %d in AddTrace " , pid )
379379 return nil
380380 }
381381 fixer := value .(* gpuTraceFixer )
@@ -387,7 +387,7 @@ func addTimeSingle(ev *CuptiTimingEvent) (CudaTraceOutput, bool) {
387387 pid := libpf .PID (ev .Pid )
388388 value , ok := gpuFixers .Load (pid )
389389 if ! ok {
390- log .Warnf ("no GPU fixer found for PID %d" , pid )
390+ log .Warnf ("no GPU fixer found for PID %d in AddTime " , pid )
391391 return CudaTraceOutput {}, false
392392 }
393393 fixer := value .(* gpuTraceFixer )
@@ -409,6 +409,7 @@ func AddTimes(events []CuptiTimingEvent) []CudaTraceOutput {
409409 pid := libpf .PID (events [0 ].Pid )
410410 value , ok := gpuFixers .Load (pid )
411411 if ! ok {
412+ log .Warnf ("no GPU fixer found for PID %d in AddTimes" , pid )
412413 return nil
413414 }
414415 fixer := value .(* gpuTraceFixer )
@@ -448,6 +449,7 @@ func MaybeClearAll() []metrics.Metric {
448449 totalTraces += stats .tracesLen
449450 totalTimesCleared += stats .timesCleared
450451 totalTracesCleared += stats .tracesCleared
452+
451453 return true
452454 })
453455
0 commit comments