Skip to content

Commit 67135e7

Browse files
committed
Combine python and native unwinder into single loop
Python, especially pytorch programs can exhaust the tail call limit by switching from python to native unwinders more than 29 times. This happens because of eval/delegation patterns where one python frame will be decorated with a couple native frames. In order to unwind these stack successfully fold the native unwinder into the python unwinder so at each frame a python or native frame can be unwound. Replace the separate walk_python_stack inner loop and outer transition loop with a single switch-in-loop structure using step_python and step_native helper functions. This reduces tail call usage from one per batch to one per loop budget exhaustion (PYTHON_NATIVE_LOOP_ITERS=9 iterations). Move native unwinder map externs (exe_id_to_*_stack_deltas, stack_delta_page_to_info, unwind_info_array) out of the TESTING_COREDUMP guard in extmaps.h so python_tracer.ebpf.c can include native_stack_trace.h. - PYTHON_NATIVE_LOOP_ITERS=9 chosen to pass BPF verifier on 5.4 kernels (ITERS=10 times out the verifier at >300s) - On failed PyCodeObject read, push frame with code object address so the agent can try via /proc/pid/mem
1 parent 0b3af68 commit 67135e7

4 files changed

Lines changed: 93 additions & 52 deletions

File tree

support/ebpf/extmaps.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,6 @@ extern bpf_map_def trace_events;
2121
extern bpf_map_def go_labels_procs;
2222
extern bpf_map_def cl_procs;
2323
extern bpf_map_def v8_procs;
24-
25-
#if defined(TESTING_COREDUMP)
26-
27-
// References to maps in alphabetical order that
28-
// are needed only for testing.
29-
30-
extern bpf_map_def apm_int_procs;
3124
extern bpf_map_def exe_id_to_8_stack_deltas;
3225
extern bpf_map_def exe_id_to_9_stack_deltas;
3326
extern bpf_map_def exe_id_to_10_stack_deltas;
@@ -44,14 +37,21 @@ extern bpf_map_def exe_id_to_20_stack_deltas;
4437
extern bpf_map_def exe_id_to_21_stack_deltas;
4538
extern bpf_map_def exe_id_to_22_stack_deltas;
4639
extern bpf_map_def exe_id_to_23_stack_deltas;
40+
extern bpf_map_def stack_delta_page_to_info;
41+
extern bpf_map_def unwind_info_array;
42+
43+
#if defined(TESTING_COREDUMP)
44+
45+
// References to maps in alphabetical order that
46+
// are needed only for testing.
47+
48+
extern bpf_map_def apm_int_procs;
4749
extern bpf_map_def hotspot_procs;
4850
extern bpf_map_def dotnet_procs;
4951
extern bpf_map_def perl_procs;
5052
extern bpf_map_def php_procs;
5153
extern bpf_map_def py_procs;
5254
extern bpf_map_def ruby_procs;
53-
extern bpf_map_def stack_delta_page_to_info;
54-
extern bpf_map_def unwind_info_array;
5555
extern bpf_map_def luajit_procs;
5656

5757
#endif // TESTING_COREDUMP

support/ebpf/python_tracer.ebpf.c

Lines changed: 84 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,11 @@
22

33
#include "bpfdefs.h"
44
#include "errors.h"
5+
#include "stackdeltatypes.h"
56
#include "tracemgmt.h"
67
#include "tsd.h"
78
#include "types.h"
89

9-
// The number of Python frames to unwind per frame-unwinding eBPF program. If
10-
// we start running out of instructions in the walk_python_stack program, one
11-
// option is to adjust this number downwards.
12-
#define FRAMES_PER_WALK_PYTHON_STACK 12
13-
1410
// Forward declaration to avoid warnings like
1511
// "declaration of 'struct pt_regs' will not be visible outside of this function [-Wvisibility]".
1612
struct pt_regs;
@@ -127,8 +123,10 @@ static EBPF_INLINE ErrorCode process_python_frame(
127123
}
128124

129125
// Read PyCodeObject
130-
if (bpf_probe_read_user(pss->code, sizeof(pss->code), py_codeobject)) {
131-
DEBUG_PRINT("Failed to read PyCodeObject at 0x%lx", (unsigned long)(py_codeobject));
126+
long pycode_err = bpf_probe_read_user(pss->code, sizeof(pss->code), py_codeobject);
127+
if (pycode_err) {
128+
DEBUG_PRINT(
129+
"Failed to read PyCodeObject at 0x%lx err=%ld", (unsigned long)(py_codeobject), pycode_err);
132130
increment_metric(metricID_UnwindPythonErrBadCodeObjectArgCountAddr);
133131
// Push the frame with the code object address so the agent can try to
134132
// read it via /proc/pid/mem (which supports page faults unlike BPF).
@@ -160,40 +158,6 @@ static EBPF_INLINE ErrorCode process_python_frame(
160158
return ERR_OK;
161159
}
162160

163-
static EBPF_INLINE ErrorCode
164-
walk_python_stack(PerCPURecord *record, const PyProcInfo *pyinfo, int *unwinder)
165-
{
166-
void *py_frame = record->pythonUnwindState.py_frame;
167-
ErrorCode error = ERR_OK;
168-
*unwinder = PROG_UNWIND_STOP;
169-
170-
UNROLL for (u32 i = 0; i < FRAMES_PER_WALK_PYTHON_STACK; ++i)
171-
{
172-
bool continue_with_next;
173-
error = process_python_frame(record, pyinfo, &py_frame, &continue_with_next);
174-
if (error) {
175-
goto stop;
176-
}
177-
if (continue_with_next) {
178-
*unwinder = get_next_unwinder_after_interpreter();
179-
goto stop;
180-
}
181-
if (!py_frame) {
182-
goto stop;
183-
}
184-
}
185-
186-
*unwinder = PROG_UNWIND_PYTHON;
187-
188-
stop:
189-
// Set up the state for the next invocation of this unwinding program.
190-
if (error || !py_frame) {
191-
unwinder_mark_done(record, PROG_UNWIND_PYTHON);
192-
}
193-
record->pythonUnwindState.py_frame = py_frame;
194-
return error;
195-
}
196-
197161
// get_PyThreadState retrieves the PyThreadState* for the current thread.
198162
//
199163
// Python sets the thread_state using pthread_setspecific with the key
@@ -281,6 +245,64 @@ static EBPF_INLINE ErrorCode get_PyFrame(const PyProcInfo *pyinfo, void **frame)
281245
return ERR_OK;
282246
}
283247

248+
#include "native_stack_trace.h"
249+
250+
// Number of loop iterations in unwind_python. Each iteration handles either
251+
// one Python frame or one native frame depending on the current unwinder state.
252+
// This bounds the BPF verifier instruction count.
253+
#define PYTHON_NATIVE_LOOP_ITERS 9
254+
255+
// step_python processes one Python frame and updates *unwinder to indicate
256+
// what should happen next: PROG_UNWIND_NATIVE to unwind a native boundary
257+
// frame, PROG_UNWIND_PYTHON to tail-call back (more frames but budget
258+
// exhausted), or PROG_UNWIND_STOP when all Python frames are done.
259+
static EBPF_INLINE ErrorCode
260+
step_python(PerCPURecord *record, const PyProcInfo *pyinfo, void **py_frame, int *unwinder)
261+
{
262+
bool continue_with_next;
263+
ErrorCode error = process_python_frame(record, pyinfo, py_frame, &continue_with_next);
264+
if (error) {
265+
*unwinder = PROG_UNWIND_STOP;
266+
return error;
267+
}
268+
if (continue_with_next) {
269+
*unwinder = get_next_unwinder_after_interpreter();
270+
} else if (!*py_frame) {
271+
*unwinder = PROG_UNWIND_STOP;
272+
} else {
273+
// More Python frames but loop budget will be exhausted; tail call to self.
274+
*unwinder = PROG_UNWIND_PYTHON;
275+
}
276+
return ERR_OK;
277+
}
278+
279+
// step_native processes one native frame at an interpreter boundary and
280+
// updates *unwinder: PROG_UNWIND_PYTHON when we've crossed back into Python,
281+
// or whatever get_next_unwinder_after_native_frame returns otherwise.
282+
static EBPF_INLINE ErrorCode step_native(PerCPURecord *record, int *unwinder)
283+
{
284+
Trace *trace = &record->trace;
285+
*unwinder = PROG_UNWIND_STOP;
286+
287+
increment_metric(metricID_UnwindNativeAttempts);
288+
ErrorCode error = push_native(
289+
trace,
290+
record->state.text_section_id,
291+
record->state.text_section_offset,
292+
record->state.return_address);
293+
if (error) {
294+
return error;
295+
}
296+
297+
bool stop;
298+
error = unwind_one_frame(record, &stop);
299+
if (error || stop) {
300+
return error;
301+
}
302+
303+
return get_next_unwinder_after_native_frame(record, unwinder);
304+
}
305+
284306
// unwind_python is the entry point for tracing when invoked from the native tracer
285307
// or interpreter dispatcher. It does not reset the trace object and will append the
286308
// Python stack frames to the trace object for the current CPU.
@@ -291,7 +313,7 @@ static EBPF_INLINE int unwind_python(struct pt_regs *ctx)
291313
return -1;
292314

293315
ErrorCode error = ERR_OK;
294-
int unwinder = get_next_unwinder_after_interpreter();
316+
int unwinder = PROG_UNWIND_PYTHON;
295317
Trace *trace = &record->trace;
296318
u32 pid = trace->pid;
297319

@@ -319,7 +341,26 @@ static EBPF_INLINE int unwind_python(struct pt_regs *ctx)
319341
goto exit;
320342
}
321343

322-
error = walk_python_stack(record, pyinfo, &unwinder);
344+
{
345+
void *py_frame = record->pythonUnwindState.py_frame;
346+
347+
for (int t = 0; t < PYTHON_NATIVE_LOOP_ITERS; t++) {
348+
switch (unwinder) {
349+
case PROG_UNWIND_PYTHON: error = step_python(record, pyinfo, &py_frame, &unwinder); break;
350+
case PROG_UNWIND_NATIVE: error = step_native(record, &unwinder); break;
351+
default: goto done;
352+
}
353+
if (error) {
354+
goto done;
355+
}
356+
}
357+
358+
done:
359+
if (error || !py_frame) {
360+
unwinder_mark_done(record, PROG_UNWIND_PYTHON);
361+
}
362+
record->pythonUnwindState.py_frame = py_frame;
363+
}
323364

324365
exit:
325366
record->state.unwind_error = error;

support/ebpf/tracer.ebpf.amd64

-72.6 KB
Binary file not shown.

support/ebpf/tracer.ebpf.arm64

-66.4 KB
Binary file not shown.

0 commit comments

Comments
 (0)