From 938e424b10be5a9f8caf3ec557b8ca806d8798bc Mon Sep 17 00:00:00 2001 From: not-matthias Date: Wed, 13 May 2026 22:01:09 -0700 Subject: [PATCH 1/3] fix(debuginfo): resolve addresses in additional R-E LOAD segments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VG_(find_DebugInfo) used only di->text_avma/text_size for address→DI lookup, which covers the section named ".text" — not the other executable sections (.text.warm, .text.cold, .bolt.org.text) that BOLT-optimized binaries place in a separate R-E PT_LOAD segment. Addresses in that second segment fell through to ob=??? even though the address-space manager already knew they were backed by the same file. Fall back to VG_(am_find_nsegment) and match the segment's filename against debugInfo_list. Reproduced on cpython-3.14 standalone (uv's distribution): obj-skip now catches py_trampoline_evaluator, _PyFunction_Vectorcall.cold, and other functions that previously escaped via ob=???. --- coregrind/m_debuginfo/debuginfo.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/coregrind/m_debuginfo/debuginfo.c b/coregrind/m_debuginfo/debuginfo.c index fbfab5650..f42782925 100644 --- a/coregrind/m_debuginfo/debuginfo.c +++ b/coregrind/m_debuginfo/debuginfo.c @@ -2617,6 +2617,27 @@ DebugInfo* VG_(find_DebugInfo) ( DiEpoch ep, Addr a ) return di; } } + /* Fallback for ELFs with multiple executable LOAD segments (e.g. BOLT- + optimized binaries: .bolt.org.text + .text + .text.warm + .text.cold + live in two separate R-E PT_LOAD segments). The text-range check above + only covers the section named ".text", so addresses in the other + executable region are missed and end up attributed to "???". Ask the + address-space manager which file backs this address, and match it to + a DebugInfo by filename. */ + if (eq_DiEpoch(ep, VG_(current_DiEpoch)())) { + const NSegment* seg = VG_(am_find_nsegment)(a); + const HChar* filename; + if (seg != NULL && (filename = VG_(am_get_filename)(seg)) != NULL) { + for (di = debugInfo_list; di != NULL; di = di->next) { + if (!is_DI_valid_for_epoch(di, ep)) + continue; + if (di->fsm.filename != NULL + && 0 == VG_(strcmp)(di->fsm.filename, filename)) { + return di; + } + } + } + } return NULL; } From 9aff0c332a8185c1d24347a7168cf89fc57b1524 Mon Sep 17 00:00:00 2001 From: not-matthias Date: Thu, 14 May 2026 10:02:54 -0700 Subject: [PATCH 2/3] feat(callgrind): add CALLGRIND_ADD_OBJ_SKIP client request Add a trapdoor that lets the client register obj-skip paths at runtime, alongside the existing cmdline --obj-skip. Useful when the skip target isn't known statically (e.g. discovering the libpython path at Python startup). Extract CLG_(add_obj_to_skip) so the cmdline parser and the new client request share the same append-to-realloc'd-array logic. --- callgrind/callgrind.h | 9 ++++++++- callgrind/clo.c | 18 ++++++++++++------ callgrind/global.h | 1 + callgrind/main.c | 7 +++++++ 4 files changed, 28 insertions(+), 7 deletions(-) diff --git a/callgrind/callgrind.h b/callgrind/callgrind.h index 88a23b274..41fae04f9 100644 --- a/callgrind/callgrind.h +++ b/callgrind/callgrind.h @@ -78,7 +78,8 @@ typedef VG_USERREQ__TOGGLE_COLLECT, VG_USERREQ__DUMP_STATS_AT, VG_USERREQ__START_INSTRUMENTATION, - VG_USERREQ__STOP_INSTRUMENTATION + VG_USERREQ__STOP_INSTRUMENTATION, + VG_USERREQ__ADD_OBJ_SKIP } Vg_CallgrindClientRequest; /* Dump current state of cost centers, and zero them afterwards */ @@ -126,4 +127,10 @@ typedef VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__STOP_INSTRUMENTATION, \ 0, 0, 0, 0, 0) +/* Add an object file path to the obj-skip list at runtime. Path matching + is exact (same as --obj-skip= on the command line). */ +#define CALLGRIND_ADD_OBJ_SKIP(path) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__ADD_OBJ_SKIP, \ + path, 0, 0, 0, 0) + #endif /* __CALLGRIND_H */ diff --git a/callgrind/clo.c b/callgrind/clo.c index 59b45b0db..fc2084869 100644 --- a/callgrind/clo.c +++ b/callgrind/clo.c @@ -400,6 +400,17 @@ void CLG_(update_fn_config)(fn_node* fn) } +void CLG_(add_obj_to_skip)(const HChar* obj_name) +{ + HChar* dup = VG_(strdup)("cl.clo.aots.1", obj_name); + CLG_(clo).objs_to_skip_count++; + CLG_(clo).objs_to_skip = VG_(realloc)("cl.clo.aots.2", + CLG_(clo).objs_to_skip, + CLG_(clo).objs_to_skip_count * sizeof(HChar*)); + CLG_(clo).objs_to_skip[CLG_(clo).objs_to_skip_count - 1] = dup; +} + + /*--------------------------------------------------------------------*/ /*--- Command line processing ---*/ /*--------------------------------------------------------------------*/ @@ -431,12 +442,7 @@ Bool CLG_(process_cmd_line_option)(const HChar* arg) fnc->skip = CONFIG_TRUE; } else if VG_STR_CLO(arg, "--obj-skip", tmp_str) { - HChar *obj_name = VG_(strdup)("cl.clo.pclo.1", tmp_str); - CLG_(clo).objs_to_skip_count++; - CLG_(clo).objs_to_skip = VG_(realloc)("cl.clo.pclo.2", - CLG_(clo).objs_to_skip, - CLG_(clo).objs_to_skip_count*sizeof(HChar*)); - CLG_(clo).objs_to_skip[CLG_(clo).objs_to_skip_count-1] = obj_name; + CLG_(add_obj_to_skip)(tmp_str); } else if VG_STR_CLO(arg, "--dump-before", tmp_str) { diff --git a/callgrind/global.h b/callgrind/global.h index de2d12508..c2fda1cce 100644 --- a/callgrind/global.h +++ b/callgrind/global.h @@ -685,6 +685,7 @@ struct event_sets { void CLG_(set_clo_defaults)(void); void CLG_(update_fn_config)(fn_node*); Bool CLG_(process_cmd_line_option)(const HChar*); +void CLG_(add_obj_to_skip)(const HChar* obj_name); void CLG_(print_usage)(void); void CLG_(print_debug_usage)(void); diff --git a/callgrind/main.c b/callgrind/main.c index 82602c9ad..3761c1448 100644 --- a/callgrind/main.c +++ b/callgrind/main.c @@ -1680,6 +1680,13 @@ Bool CLG_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret) *ret = 0; /* meaningless */ break; + case VG_USERREQ__ADD_OBJ_SKIP: { + const HChar* path = (const HChar*)args[1]; + CLG_(add_obj_to_skip)(path); + *ret = 0; + break; + } + case VG_USERREQ__GDB_MONITOR_COMMAND: { Bool handled = handle_gdb_monitor_command (tid, (HChar*)args[1]); if (handled) From fe0dbfcb1bf90a170bfcdaa64e0c22c53d99d8d0 Mon Sep 17 00:00:00 2001 From: not-matthias Date: Thu, 14 May 2026 10:18:51 -0700 Subject: [PATCH 3/3] test(callgrind): add Python regression test for runtime obj-skip End-to-end test for CALLGRIND_ADD_OBJ_SKIP using the canonical flow: start with --instr-atstart=no, register the libpython path via the trapdoor, then turn instrumentation on so the skip applies from the first instrumented BB. --- callgrind/tests/Makefile.am | 12 ++++ callgrind/tests/runtime_obj_skip_py.post.exp | 1 + callgrind/tests/runtime_obj_skip_py.py | 70 +++++++++++++++++++ .../tests/runtime_obj_skip_py.stderr.exp | 6 ++ callgrind/tests/runtime_obj_skip_py.vgtest | 6 ++ callgrind/tests/runtime_obj_skip_py_shim.c | 20 ++++++ 6 files changed, 115 insertions(+) create mode 100644 callgrind/tests/runtime_obj_skip_py.post.exp create mode 100644 callgrind/tests/runtime_obj_skip_py.py create mode 100644 callgrind/tests/runtime_obj_skip_py.stderr.exp create mode 100644 callgrind/tests/runtime_obj_skip_py.vgtest create mode 100644 callgrind/tests/runtime_obj_skip_py_shim.c diff --git a/callgrind/tests/Makefile.am b/callgrind/tests/Makefile.am index bc3584a8e..79a076042 100644 --- a/callgrind/tests/Makefile.am +++ b/callgrind/tests/Makefile.am @@ -10,6 +10,8 @@ EXTRA_DIST = \ ann1.post.exp ann1.stderr.exp ann1.vgtest \ ann2.post.exp ann2.stderr.exp ann2.vgtest \ clreq.vgtest clreq.stderr.exp \ + runtime_obj_skip_py.vgtest runtime_obj_skip_py.stderr.exp runtime_obj_skip_py.post.exp \ + runtime_obj_skip_py.py runtime_obj_skip_py_shim.c \ bug497723.stderr.exp bug497723.post.exp bug497723.vgtest \ simwork1.vgtest simwork1.stdout.exp simwork1.stderr.exp \ simwork2.vgtest simwork2.stdout.exp simwork2.stderr.exp \ @@ -38,3 +40,13 @@ inline_samefile_CFLAGS = $(AM_CFLAGS) -O2 -g inline_crossfile_CFLAGS = $(AM_CFLAGS) -O2 -g threads_LDADD = -lpthread + +# Shim loaded by runtime_obj_skip_py.py via ctypes. Built unconditionally; +# the test's prereq skips it if the .so is missing. +check_DATA = runtime_obj_skip_py_shim.so + +runtime_obj_skip_py_shim.so: runtime_obj_skip_py_shim.c + $(CC) -shared -fPIC -O2 -I$(top_srcdir) -I$(top_srcdir)/include \ + $< -o $@ + +CLEANFILES = runtime_obj_skip_py_shim.so diff --git a/callgrind/tests/runtime_obj_skip_py.post.exp b/callgrind/tests/runtime_obj_skip_py.post.exp new file mode 100644 index 000000000..d86bac9de --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_py.post.exp @@ -0,0 +1 @@ +OK diff --git a/callgrind/tests/runtime_obj_skip_py.py b/callgrind/tests/runtime_obj_skip_py.py new file mode 100644 index 000000000..1c2b5b748 --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_py.py @@ -0,0 +1,70 @@ +"""Resolve libpython at runtime via sysconfig (mirrors pytest-codspeed's +approach in instruments/walltime.py), register it for obj-skip via the +client-request shim, then turn instrumentation on and run a small +integer workload. + +We pass both the sysconfig path AND its os.path.realpath because +callgrind stores the realpath in obj_node->name (after symlink +resolution), and the runtime obj-skip check uses exact strcmp.""" + +import ctypes +import os +import sys +import sysconfig + + +def libpython_candidates() -> list[str]: + ldlibrary = sysconfig.get_config_var("LDLIBRARY") + libdir = sysconfig.get_config_var("LIBDIR") + paths: list[str] = [] + if ldlibrary and libdir: + paths.append(os.path.join(libdir, ldlibrary)) + if ldlibrary: + paths.append(os.path.join(sys.prefix, "lib", ldlibrary)) + # Add realpath variants so the exact-match obj-skip finds the + # file under whichever name the loader actually mapped. + resolved: list[str] = [] + seen: set[str] = set() + for p in paths: + if not p: + continue + if p not in seen and os.path.exists(p): + resolved.append(p) + seen.add(p) + try: + r = os.path.realpath(p) + except OSError: + continue + if r not in seen and os.path.exists(r): + resolved.append(r) + seen.add(r) + return resolved + + +def main() -> None: + here = os.path.dirname(os.path.abspath(__file__)) + shim = ctypes.CDLL(os.path.join(here, "runtime_obj_skip_py_shim.so")) + shim.add_obj_skip.argtypes = [ctypes.c_char_p] + shim.add_obj_skip.restype = None + shim.start_instr.argtypes = [] + shim.start_instr.restype = None + shim.stop_instr.argtypes = [] + shim.stop_instr.restype = None + + for path in libpython_candidates(): + shim.add_obj_skip(path.encode()) + if sys.executable: + shim.add_obj_skip(sys.executable.encode()) + real = os.path.realpath(sys.executable) + if real != sys.executable: + shim.add_obj_skip(real.encode()) + + shim.start_instr() + acc = 0 + for i in range(10_000): + acc = (acc + i * i) ^ (i << 1) + shim.stop_instr() + + +if __name__ == "__main__": + main() diff --git a/callgrind/tests/runtime_obj_skip_py.stderr.exp b/callgrind/tests/runtime_obj_skip_py.stderr.exp new file mode 100644 index 000000000..d0b7820ae --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_py.stderr.exp @@ -0,0 +1,6 @@ + + +Events : Ir +Collected : + +I refs: diff --git a/callgrind/tests/runtime_obj_skip_py.vgtest b/callgrind/tests/runtime_obj_skip_py.vgtest new file mode 100644 index 000000000..9e45c997e --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_py.vgtest @@ -0,0 +1,6 @@ +prereq: command -v python3 >/dev/null && test -f runtime_obj_skip_py_shim.so +prog-asis: python3 +args: runtime_obj_skip_py.py +vgopts: --instr-atstart=no --compress-strings=no --callgrind-out-file=callgrind.out.runtime_obj_skip_py +post: sh -c 'c=$(awk "/^ob=/{p=(\$0~/libpython/)} /^fn=/&&p{c++} END{print c+0}" callgrind.out.runtime_obj_skip_py); if [ "$c" -lt 100 ]; then echo OK; else echo "FAIL libpython fns=$c"; fi' +cleanup: rm -f callgrind.out.runtime_obj_skip_py diff --git a/callgrind/tests/runtime_obj_skip_py_shim.c b/callgrind/tests/runtime_obj_skip_py_shim.c new file mode 100644 index 000000000..25e69ca79 --- /dev/null +++ b/callgrind/tests/runtime_obj_skip_py_shim.c @@ -0,0 +1,20 @@ +/* Shim so Python (via ctypes) can issue callgrind client requests. + The requests themselves are inline asm and unreachable from pure + Python; this file just wraps them in regular C functions. */ + +#include "../callgrind.h" + +void add_obj_skip(const char* path) +{ + CALLGRIND_ADD_OBJ_SKIP(path); +} + +void start_instr(void) +{ + CALLGRIND_START_INSTRUMENTATION; +} + +void stop_instr(void) +{ + CALLGRIND_STOP_INSTRUMENTATION; +}