Skip to content

Commit 59dd22b

Browse files
committed
Add tests for GPU sample handling
1 parent b28a9e5 commit 59dd22b

3 files changed

Lines changed: 389 additions & 0 deletions

File tree

interpreter/gpu/cuda_test.go

Lines changed: 349 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,17 @@ package gpu_test
55

66
import (
77
"testing"
8+
"unique"
9+
"unsafe"
810

11+
"github.com/stretchr/testify/assert"
912
"github.com/stretchr/testify/require"
13+
"github.com/zeebo/xxh3"
14+
15+
"go.opentelemetry.io/ebpf-profiler/host"
1016
"go.opentelemetry.io/ebpf-profiler/interpreter/gpu"
17+
"go.opentelemetry.io/ebpf-profiler/libpf"
18+
"go.opentelemetry.io/ebpf-profiler/reporter/samples"
1119
"go.opentelemetry.io/ebpf-profiler/support"
1220
)
1321

@@ -42,3 +50,344 @@ func TestProgramNamesExist(t *testing.T) {
4250
t.Logf("Found program %q", gpu.USDTProgCudaProbe)
4351
})
4452
}
53+
54+
// computeTraceHash replicates the hash logic from tracer.loadBpfTrace:
55+
// zero per-sample fields, then hash the raw bytes.
56+
func computeTraceHash(tr *support.Trace) host.TraceHash {
57+
// Work on a copy so we don't mutate the caller's data.
58+
clone := *tr
59+
clone.ZeroPerSampleFields()
60+
raw := unsafe.Slice((*byte)(unsafe.Pointer(&clone)), unsafe.Sizeof(clone))
61+
return host.TraceHash(xxh3.Hash128(raw).Lo)
62+
}
63+
64+
// makeCUDATrace builds a support.Trace with one CUDA kernel frame (at position 0)
65+
// followed by nativFrames native frames. The CUDA frame encodes the given
66+
// correlationID and cbid.
67+
func makeCUDATrace(pid uint32, correlationID uint32, cbid int32,
68+
nativeFrames []support.Frame) support.Trace {
69+
tr := support.Trace{
70+
Pid: pid,
71+
Tid: pid,
72+
Origin: support.TraceOriginCuda,
73+
Kernel_stack_id: -1, // no kernel stack
74+
}
75+
76+
// CUDA kernel frame first (matches BPF collect_trace ordering).
77+
cudaID := uint64(correlationID) | (uint64(uint32(cbid)) << 32)
78+
tr.Frames[0] = support.Frame{
79+
Kind: support.FrameMarkerCUDAKernel,
80+
Addr_or_line: cudaID,
81+
}
82+
tr.Stack_len = 1
83+
84+
for i, f := range nativeFrames {
85+
tr.Frames[1+i] = f
86+
tr.Stack_len++
87+
}
88+
89+
return tr
90+
}
91+
92+
func TestCUDATraceHashStability(t *testing.T) {
93+
// Two launches from the same call site (identical native frames)
94+
// with different correlation IDs must produce the same hash.
95+
nativeFrames := []support.Frame{
96+
{File_id: 0xaaaa, Addr_or_line: 0x1000, Kind: 8}, // native
97+
{File_id: 0xaaaa, Addr_or_line: 0x2000, Kind: 8}, // native
98+
{File_id: 0xbbbb, Addr_or_line: 0x3000, Kind: 8}, // native
99+
}
100+
101+
tr1 := makeCUDATrace(100, 42, 1, nativeFrames)
102+
tr2 := makeCUDATrace(100, 999, 1, nativeFrames)
103+
104+
hash1 := computeTraceHash(&tr1)
105+
hash2 := computeTraceHash(&tr2)
106+
assert.Equal(t, hash1, hash2,
107+
"same call site with different correlation IDs should produce identical hashes")
108+
109+
// Different CBID (different API call type) with same native stack should
110+
// also produce the same hash since cbid is part of addr_or_line.
111+
tr3 := makeCUDATrace(100, 42, 7, nativeFrames)
112+
hash3 := computeTraceHash(&tr3)
113+
assert.Equal(t, hash1, hash3,
114+
"same call site with different CBIDs should produce identical hashes")
115+
}
116+
117+
func TestCUDATraceHashDiffers(t *testing.T) {
118+
framesA := []support.Frame{
119+
{File_id: 0xaaaa, Addr_or_line: 0x1000, Kind: 8},
120+
}
121+
framesB := []support.Frame{
122+
{File_id: 0xaaaa, Addr_or_line: 0x2000, Kind: 8}, // different addr
123+
}
124+
125+
trA := makeCUDATrace(100, 42, 1, framesA)
126+
trB := makeCUDATrace(100, 42, 1, framesB)
127+
128+
hashA := computeTraceHash(&trA)
129+
hashB := computeTraceHash(&trB)
130+
assert.NotEqual(t, hashA, hashB,
131+
"different native stacks should produce different hashes")
132+
}
133+
134+
func TestCUDATraceHashExcludesPerSampleFields(t *testing.T) {
135+
frames := []support.Frame{
136+
{File_id: 0xaaaa, Addr_or_line: 0x1000, Kind: 8},
137+
}
138+
139+
tr1 := makeCUDATrace(100, 42, 1, frames)
140+
tr2 := makeCUDATrace(100, 42, 1, frames)
141+
142+
// Vary all the per-sample fields that should be excluded.
143+
tr2.Ktime = 99999
144+
tr2.Origin = support.TraceOriginOffCPU
145+
tr2.Offtime = 12345
146+
tr2.Comm = [16]byte{'d', 'i', 'f', 'f', 'e', 'r', 'e', 'n', 't'}
147+
148+
hash1 := computeTraceHash(&tr1)
149+
hash2 := computeTraceHash(&tr2)
150+
assert.Equal(t, hash1, hash2,
151+
"per-sample fields (ktime, origin, offtime, comm) must not affect hash")
152+
}
153+
154+
func TestNonCUDATraceHashIncludesAddrOrLine(t *testing.T) {
155+
// For non-CUDA frames, addr_or_line MUST be included in the hash.
156+
makeNative := func(addr uint64) support.Trace {
157+
tr := support.Trace{
158+
Pid: 100,
159+
Tid: 100,
160+
Origin: support.TraceOriginSampling,
161+
Stack_len: 1,
162+
Kernel_stack_id: -1,
163+
}
164+
tr.Frames[0] = support.Frame{
165+
File_id: 0xaaaa,
166+
Addr_or_line: addr,
167+
Kind: 8, // native
168+
}
169+
return tr
170+
}
171+
172+
tr1 := makeNative(0x1000)
173+
tr2 := makeNative(0x2000)
174+
175+
hash1 := computeTraceHash(&tr1)
176+
hash2 := computeTraceHash(&tr2)
177+
assert.NotEqual(t, hash1, hash2,
178+
"non-CUDA traces with different addresses must have different hashes")
179+
}
180+
181+
// makeSymbolizedTrace builds a libpf.Trace that looks like what ConvertTrace
182+
// produces for a CUDA trace: frames before cudaFrameIdx are native, then the
183+
// CUDAKernelFrame, then more native frames.
184+
func makeSymbolizedTrace(cudaFrameIdx int, nativeFrameCount int) *libpf.Trace {
185+
trace := &libpf.Trace{}
186+
for i := range cudaFrameIdx + 1 + nativeFrameCount {
187+
if i == cudaFrameIdx {
188+
trace.Frames = append(trace.Frames, unique.Make(libpf.Frame{
189+
Type: libpf.CUDAKernelFrame,
190+
}))
191+
} else {
192+
trace.Frames = append(trace.Frames, unique.Make(libpf.Frame{
193+
Type: libpf.NativeFrame,
194+
AddressOrLineno: libpf.AddressOrLineno(0x1000 * (i + 1)),
195+
FileID: libpf.NewFileID(uint64(i+1), 0),
196+
}))
197+
}
198+
}
199+
return trace
200+
}
201+
202+
func TestAddTraceAndTimes(t *testing.T) {
203+
const pid = libpf.PID(500)
204+
gpu.RegisterTestFixer(pid)
205+
t.Cleanup(func() { gpu.UnregisterTestFixer(pid) })
206+
207+
// Simulate: trace arrives first, timing arrives second.
208+
trace := makeSymbolizedTrace(0, 2) // CUDA frame at index 0
209+
meta := &samples.TraceEventMeta{PID: pid}
210+
211+
st := &gpu.SymbolizedCudaTrace{
212+
Trace: trace,
213+
Meta: meta,
214+
CUDAFrameIdx: 0,
215+
CorrelationID: 100,
216+
CBID: 1,
217+
}
218+
219+
// AddTrace with no pending timing -> stored, no outputs.
220+
outputs := gpu.AddTrace(st)
221+
assert.Empty(t, outputs, "no timing yet, should produce no outputs")
222+
223+
// Now timing arrives.
224+
kernelName := [256]byte{}
225+
copy(kernelName[:], "_Z9myKernelPfS_i")
226+
227+
events := []gpu.CuptiTimingEvent{{
228+
Pid: uint32(pid),
229+
Id: 100,
230+
Start: 1000,
231+
End: 2000,
232+
Dev: 0,
233+
Stream: 7,
234+
KernelName: kernelName,
235+
}}
236+
237+
outputs = gpu.AddTimes(events)
238+
require.Len(t, outputs, 1, "timing matched, should produce one output")
239+
240+
out := outputs[0]
241+
assert.Equal(t, int64(1000), out.Meta.OffTime, "OffTime should be End-Start")
242+
assert.Equal(t, "0", out.Trace.CustomLabels["cuda_device"])
243+
assert.Equal(t, "7", out.Trace.CustomLabels["cuda_stream"])
244+
245+
// Verify the CUDA frame got the demangled kernel name and zeroed AddressOrLineno.
246+
cudaFrame := out.Trace.Frames[0].Value()
247+
assert.Equal(t, libpf.CUDAKernelFrame, cudaFrame.Type)
248+
assert.Equal(t, libpf.AddressOrLineno(0), cudaFrame.AddressOrLineno,
249+
"correlation ID should be zeroed in output")
250+
assert.Contains(t, cudaFrame.FunctionName.String(), "myKernel",
251+
"kernel name should be demangled")
252+
}
253+
254+
func TestAddTimeThenTrace(t *testing.T) {
255+
const pid = libpf.PID(501)
256+
gpu.RegisterTestFixer(pid)
257+
t.Cleanup(func() { gpu.UnregisterTestFixer(pid) })
258+
259+
// Simulate: timing arrives first, trace arrives second.
260+
kernelName := [256]byte{}
261+
copy(kernelName[:], "_Z6squarePfS_")
262+
263+
events := []gpu.CuptiTimingEvent{{
264+
Pid: uint32(pid),
265+
Id: 200,
266+
Start: 5000,
267+
End: 8000,
268+
Dev: 1,
269+
KernelName: kernelName,
270+
}}
271+
272+
// Timing arrives first -> stored, no outputs.
273+
outputs := gpu.AddTimes(events)
274+
assert.Empty(t, outputs)
275+
276+
// Now trace arrives and matches.
277+
trace := makeSymbolizedTrace(0, 1)
278+
meta := &samples.TraceEventMeta{PID: pid}
279+
280+
st := &gpu.SymbolizedCudaTrace{
281+
Trace: trace,
282+
Meta: meta,
283+
CUDAFrameIdx: 0,
284+
CorrelationID: 200,
285+
CBID: 1,
286+
}
287+
288+
outputs = gpu.AddTrace(st)
289+
require.Len(t, outputs, 1)
290+
291+
out := outputs[0]
292+
assert.Equal(t, int64(3000), out.Meta.OffTime)
293+
assert.Equal(t, "1", out.Trace.CustomLabels["cuda_device"])
294+
295+
cudaFrame := out.Trace.Frames[0].Value()
296+
assert.Contains(t, cudaFrame.FunctionName.String(), "square")
297+
assert.Equal(t, libpf.AddressOrLineno(0), cudaFrame.AddressOrLineno)
298+
}
299+
300+
func TestCachedTemplateWithDifferentCorrelationIDs(t *testing.T) {
301+
const pid = libpf.PID(502)
302+
gpu.RegisterTestFixer(pid)
303+
t.Cleanup(func() { gpu.UnregisterTestFixer(pid) })
304+
305+
// Simulate two launches from the same call site (same template) with
306+
// different correlation IDs and different kernel names from timing.
307+
// This is the scenario where the cache provides the template.
308+
template := makeSymbolizedTrace(0, 2)
309+
310+
for _, tc := range []struct {
311+
corrID uint32
312+
kernelName string
313+
offTime int64
314+
}{
315+
{corrID: 300, kernelName: "_Z7kernelAv", offTime: 100},
316+
{corrID: 301, kernelName: "_Z7kernelBv", offTime: 200},
317+
} {
318+
meta := &samples.TraceEventMeta{PID: pid}
319+
320+
st := &gpu.SymbolizedCudaTrace{
321+
Trace: template,
322+
Meta: meta,
323+
CUDAFrameIdx: 0,
324+
CorrelationID: tc.corrID,
325+
CBID: 1,
326+
}
327+
gpu.AddTrace(st)
328+
329+
kn := [256]byte{}
330+
copy(kn[:], tc.kernelName)
331+
outputs := gpu.AddTimes([]gpu.CuptiTimingEvent{{
332+
Pid: uint32(pid),
333+
Id: tc.corrID,
334+
Start: 0,
335+
End: uint64(tc.offTime),
336+
KernelName: kn,
337+
}})
338+
require.Len(t, outputs, 1, "corrID %d should produce one output", tc.corrID)
339+
340+
out := outputs[0]
341+
assert.Equal(t, tc.offTime, out.Meta.OffTime)
342+
343+
cudaFrame := out.Trace.Frames[0].Value()
344+
assert.Equal(t, libpf.AddressOrLineno(0), cudaFrame.AddressOrLineno,
345+
"correlation ID must not leak into output")
346+
// Verify each launch got its own kernel name.
347+
assert.Contains(t, cudaFrame.FunctionName.String(), tc.kernelName[3:10],
348+
"each launch should get its own kernel name")
349+
}
350+
}
351+
352+
func TestCUDAFrameIdxNonZero(t *testing.T) {
353+
const pid = libpf.PID(503)
354+
gpu.RegisterTestFixer(pid)
355+
t.Cleanup(func() { gpu.UnregisterTestFixer(pid) })
356+
357+
// CUDA frame at index 2 (after two kernel/native frames).
358+
trace := makeSymbolizedTrace(2, 2) // [native, native, CUDA, native, native]
359+
meta := &samples.TraceEventMeta{PID: pid}
360+
361+
kernelName := [256]byte{}
362+
copy(kernelName[:], "_Z4testv")
363+
364+
st := &gpu.SymbolizedCudaTrace{
365+
Trace: trace,
366+
Meta: meta,
367+
CUDAFrameIdx: 2,
368+
CorrelationID: 400,
369+
CBID: 1,
370+
}
371+
gpu.AddTrace(st)
372+
373+
outputs := gpu.AddTimes([]gpu.CuptiTimingEvent{{
374+
Pid: uint32(pid),
375+
Id: 400,
376+
Start: 0,
377+
End: 500,
378+
KernelName: kernelName,
379+
}})
380+
require.Len(t, outputs, 1)
381+
382+
// The CUDA frame at index 2 should have the kernel name.
383+
cudaFrame := outputs[0].Trace.Frames[2].Value()
384+
assert.Equal(t, libpf.CUDAKernelFrame, cudaFrame.Type)
385+
assert.Contains(t, cudaFrame.FunctionName.String(), "test")
386+
387+
// The non-CUDA frames should be untouched.
388+
for _, idx := range []int{0, 1, 3, 4} {
389+
f := outputs[0].Trace.Frames[idx].Value()
390+
assert.Equal(t, libpf.NativeFrame, f.Type,
391+
"frame %d should remain native", idx)
392+
}
393+
}

interpreter/gpu/export_test.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package gpu
5+
6+
import "go.opentelemetry.io/ebpf-profiler/libpf"
7+
8+
// RegisterTestFixer creates and registers a gpuTraceFixer for the given PID.
9+
// For use in tests only.
10+
func RegisterTestFixer(pid libpf.PID) {
11+
fixer := &gpuTraceFixer{
12+
timesAwaitingTraces: make(map[uint32][]CuptiTimingEvent),
13+
tracesAwaitingTimes: make(map[uint32]*SymbolizedCudaTrace),
14+
}
15+
gpuFixers.Store(pid, fixer)
16+
}
17+
18+
// UnregisterTestFixer removes the fixer for the given PID.
19+
func UnregisterTestFixer(pid libpf.PID) {
20+
gpuFixers.Delete(pid)
21+
}

0 commit comments

Comments
 (0)