Skip to content

Commit 8a35d79

Browse files
committed
feat(callgrind): capture inline function calls into the callgrind out file
1 parent 3f9d0e9 commit 8a35d79

19 files changed

Lines changed: 372 additions & 17 deletions

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,8 @@
158158
/callgrind/tests/clreq
159159
/callgrind/tests/simwork
160160
/callgrind/tests/threads
161+
/callgrind/tests/inline-samefile
162+
/callgrind/tests/inline-crossfile
161163

162164
# /coregrind/
163165
/coregrind/*.a

CODSPEED-CHANGELOG.md

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# CodSpeed Valgrind Changelog
2+
3+
This file documents changes made to Valgrind for CodSpeed integration, beyond the baseline Valgrind distribution.
4+
5+
6+
### Callgrind: Inline Function Tracking
7+
8+
**Feature**: Added `cfni=` (call file name inline) markers to Callgrind output to track execution within inlined functions.
9+
10+
**Usage**: Enable with the `--read-inline-info=yes` flag.
11+
12+
**How it works**:
13+
- Reads DWARF inline location table (`DW_TAG_inlined_subroutine`) from debug info
14+
- Outputs `cfni=function_name` when entering an inlined function
15+
- Outputs `cfni=???` when leaving inline code and returning to non-inlined code
16+
- Tracks transitions between different inlined functions
17+
- Works seamlessly with file context markers (`fi=`, `fe=`)
18+
19+
**Example 1: Same-file inlining**
20+
21+
Source code:
22+
```c
23+
static inline int compute_sum(int x) {
24+
int sum = 0;
25+
for (int i = 0; i < x; i++) {
26+
sum += i;
27+
}
28+
return sum;
29+
}
30+
31+
int main() {
32+
int result = compute_sum(10); // This call will be inlined
33+
printf("result=%d\n", result);
34+
return 0;
35+
}
36+
```
37+
38+
Callgrind output (excerpt):
39+
```
40+
fn=main
41+
12 1
42+
-1 1
43+
cfni=compute_sum ← Entering inlined function
44+
-6 3
45+
-1 3
46+
+1 4
47+
+1 1
48+
-1 3
49+
cfni=??? ← Leaving inlined function
50+
+13 1
51+
+1 3
52+
cfn=printf ← Regular function call (not inlined)
53+
```
54+
55+
**Example 2: Cross-file inlining**
56+
57+
Source files:
58+
```c
59+
// helper.h
60+
static inline int add_five(int x) {
61+
return x + 5;
62+
}
63+
64+
// main.c
65+
#include "helper.h"
66+
int main() {
67+
int result = add_five(10); // Inlined from helper.h
68+
printf("result=%d\n", result);
69+
return 0;
70+
}
71+
```
72+
73+
Callgrind output (excerpt):
74+
```
75+
fn=main
76+
fi=helper.h ← File context changes to header
77+
cfni=add_five ← Entering inlined function from header
78+
+2 1
79+
fe=main.c ← Returning to original file
80+
cfni=??? ← Leaving inlined function
81+
+1 3
82+
cfn=printf
83+
```

callgrind/dump.c

Lines changed: 86 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,7 @@ static Addr debug_cache_addr[DEBUG_CACHE_SIZE];
344344
static file_node* debug_cache_file[DEBUG_CACHE_SIZE];
345345
static int debug_cache_line[DEBUG_CACHE_SIZE];
346346
static Bool debug_cache_info[DEBUG_CACHE_SIZE];
347+
static const HChar* debug_cache_inlfn[DEBUG_CACHE_SIZE];
347348

348349
static __inline__
349350
void init_debug_cache(void)
@@ -354,6 +355,7 @@ void init_debug_cache(void)
354355
debug_cache_file[i] = 0;
355356
debug_cache_line[i] = 0;
356357
debug_cache_info[i] = 0;
358+
debug_cache_inlfn[i] = 0;
357359
}
358360
}
359361

@@ -386,6 +388,15 @@ Bool get_debug_pos(BBCC* bbcc, Addr addr, AddrPos* p)
386388
debug_cache_addr[cachepos] = addr;
387389
debug_cache_line[cachepos] = p->line;
388390
debug_cache_file[cachepos] = p->file;
391+
392+
/* Query inline info at the same time we query file/line */
393+
const HChar* inl_fn = 0;
394+
Bool has_inline = VG_(get_inline_fnname)(ep, addr, &inl_fn);
395+
if (has_inline) {
396+
debug_cache_inlfn[cachepos] = inl_fn;
397+
} else {
398+
debug_cache_inlfn[cachepos] = (const HChar*)(-1);
399+
}
389400
}
390401

391402
/* Address offset from bbcc start address */
@@ -399,6 +410,44 @@ Bool get_debug_pos(BBCC* bbcc, Addr addr, AddrPos* p)
399410
return found_file_line;
400411
}
401412

413+
/* Get inline function name for an address, with caching.
414+
* Returns True if address is in an inlined function, False otherwise.
415+
* If True, *inl_fn will be set to the inline function name.
416+
*/
417+
static Bool get_inline_info(Addr addr, const HChar** inl_fn)
418+
{
419+
int cachepos = addr % DEBUG_CACHE_SIZE;
420+
421+
/* Check cache first - but only if inline info was already queried for this address */
422+
if (debug_cache_addr[cachepos] == addr && debug_cache_inlfn[cachepos] != 0) {
423+
/* We have cached inline info for this address */
424+
if (debug_cache_inlfn[cachepos] == (const HChar*)(-1)) {
425+
/* Special marker: no inline function at this address */
426+
*inl_fn = 0;
427+
return False;
428+
}
429+
*inl_fn = debug_cache_inlfn[cachepos];
430+
return True;
431+
}
432+
433+
DiEpoch ep = VG_(current_DiEpoch)();
434+
Bool has_inline = VG_(get_inline_fnname)(ep, addr, inl_fn);
435+
436+
if (has_inline) {
437+
/* Cache the inline function name */
438+
debug_cache_inlfn[cachepos] = *inl_fn;
439+
} else {
440+
*inl_fn = 0;
441+
/* Use special marker -1 to indicate "no inline function" */
442+
debug_cache_inlfn[cachepos] = (const HChar*)(-1);
443+
}
444+
445+
CLG_DEBUG(3, " get_inline_info(%#lx): %s\n",
446+
addr, has_inline ? *inl_fn : "(not inlined)");
447+
448+
return has_inline;
449+
}
450+
402451

403452
/* copy file position and init cost */
404453
static void init_apos(AddrPos* p, Addr addr, Addr bbaddr, file_node* file)
@@ -426,13 +475,15 @@ static void init_fcost(AddrCost* c, Addr addr, Addr bbaddr, file_node* file)
426475
CLG_(init_cost)( CLG_(sets).full, c->cost );
427476
}
428477

478+
/* Track last inline function to avoid repeated cfni= output */
479+
static const HChar* last_inline_fn = 0;
429480

430481
/**
431482
* print position change inside of a BB (last -> curr)
432483
* this doesn't update last to curr!
433484
*/
434485
static void fprint_apos(VgFile *fp, AddrPos* curr, AddrPos* last,
435-
file_node* func_file)
486+
file_node* func_file, BBCC* bbcc)
436487
{
437488
CLG_ASSERT(curr->file != 0);
438489
CLG_DEBUG(2, " print_apos(file '%s', line %u, bb %#lx, addr %#lx) fnFile '%s'\n",
@@ -448,6 +499,24 @@ static void fprint_apos(VgFile *fp, AddrPos* curr, AddrPos* last,
448499
print_file(fp, "fi=", curr->file);
449500
}
450501

502+
/* Check inline function for this position and output cfni= if changed */
503+
if (bbcc) {
504+
Addr curr_addr = curr->addr + bbcc->bb->obj->offset;
505+
const HChar* inline_fn = 0;
506+
Bool is_inline = get_inline_info(curr_addr, &inline_fn);
507+
508+
/* Output cfni= if inline function changed */
509+
if (is_inline && inline_fn && inline_fn != last_inline_fn) {
510+
VG_(fprintf)(fp, "cfni=%s\n", inline_fn);
511+
last_inline_fn = inline_fn;
512+
}
513+
/* Clear last_inline_fn if we're no longer in inline code */
514+
else if (!is_inline && last_inline_fn) {
515+
VG_(fprintf)(fp, "cfni=???\n");
516+
last_inline_fn = 0;
517+
}
518+
}
519+
451520
if (CLG_(clo).dump_bbs) {
452521
if (curr->line != last->line) {
453522
VG_(fprintf)(fp, "ln=%u\n", curr->line);
@@ -704,8 +773,8 @@ static Bool fprint_bbcc(VgFile *fp, BBCC* bbcc, AddrPos* last)
704773
/* get debug info of current instruction address and dump cost
705774
* if CLG_(clo).dump_bbs or file/line has changed
706775
*/
707-
if (!get_debug_pos(bbcc, bb_addr(bb) + instr_info->instr_offset,
708-
&(newCost->p))) {
776+
Addr instr_addr = bb_addr(bb) + instr_info->instr_offset;
777+
if (!get_debug_pos(bbcc, instr_addr, &(newCost->p))) {
709778
/* if we don't have debug info, don't switch to file "???" */
710779
newCost->p.file = bbcc->cxt->fn[0]->file;
711780
}
@@ -716,8 +785,10 @@ static Bool fprint_bbcc(VgFile *fp, BBCC* bbcc, AddrPos* last)
716785

717786
if (!CLG_(is_zero_cost)( CLG_(sets).full, currCost->cost )) {
718787
something_written = True;
719-
720-
fprint_apos(fp, &(currCost->p), last, bbcc->cxt->fn[0]->file);
788+
789+
/* Output file position and inline function markers */
790+
fprint_apos(fp, &(currCost->p), last, bbcc->cxt->fn[0]->file, bbcc);
791+
721792
fprint_fcost(fp, currCost, last);
722793
}
723794

@@ -741,11 +812,11 @@ static Bool fprint_bbcc(VgFile *fp, BBCC* bbcc, AddrPos* last)
741812
if (jcc_count>0) {
742813
if (!CLG_(is_zero_cost)( CLG_(sets).full, currCost->cost )) {
743814
/* no need to switch buffers, as position is the same */
744-
fprint_apos(fp, &(currCost->p), last, bbcc->cxt->fn[0]->file);
815+
fprint_apos(fp, &(currCost->p), last, bbcc->cxt->fn[0]->file, bbcc);
745816
fprint_fcost(fp, currCost, last);
746817
}
747818
get_debug_pos(bbcc, bb_addr(bb)+instr_info->instr_offset, &(currCost->p));
748-
fprint_apos(fp, &(currCost->p), last, bbcc->cxt->fn[0]->file);
819+
fprint_apos(fp, &(currCost->p), last, bbcc->cxt->fn[0]->file, bbcc);
749820
something_written = True;
750821
for(jcc=bbcc->jmp[jmp].jcc_list; jcc; jcc=jcc->next_from) {
751822
if (((jcc->jmpkind != jk_Call) && (jcc->call_counter >0)) ||
@@ -773,17 +844,17 @@ static Bool fprint_bbcc(VgFile *fp, BBCC* bbcc, AddrPos* last)
773844
}
774845

775846
if ( (bbcc->skipped &&
776-
!CLG_(is_zero_cost)(CLG_(sets).full, bbcc->skipped)) ||
847+
!CLG_(is_zero_cost)(CLG_(sets).full, bbcc->skipped)) ||
777848
(jcc_count>0) ) {
778-
849+
779850
if (!CLG_(is_zero_cost)( CLG_(sets).full, currCost->cost )) {
780851
/* no need to switch buffers, as position is the same */
781-
fprint_apos(fp, &(currCost->p), last, bbcc->cxt->fn[0]->file);
852+
fprint_apos(fp, &(currCost->p), last, bbcc->cxt->fn[0]->file, bbcc);
782853
fprint_fcost(fp, currCost, last);
783854
}
784855

785856
get_debug_pos(bbcc, bb_jmpaddr(bb), &(currCost->p));
786-
fprint_apos(fp, &(currCost->p), last, bbcc->cxt->fn[0]->file);
857+
fprint_apos(fp, &(currCost->p), last, bbcc->cxt->fn[0]->file, bbcc);
787858
something_written = True;
788859

789860
/* first, print skipped costs for calls */
@@ -810,8 +881,8 @@ static Bool fprint_bbcc(VgFile *fp, BBCC* bbcc, AddrPos* last)
810881
if (CLG_(clo).dump_bbs || CLG_(clo).dump_bb) {
811882
if (!CLG_(is_zero_cost)( CLG_(sets).full, currCost->cost )) {
812883
something_written = True;
813-
814-
fprint_apos(fp, &(currCost->p), last, bbcc->cxt->fn[0]->file);
884+
885+
fprint_apos(fp, &(currCost->p), last, bbcc->cxt->fn[0]->file, bbcc);
815886
fprint_fcost(fp, currCost, last);
816887
}
817888
if (CLG_(clo).dump_bbs) VG_(fprintf)(fp, "\n");
@@ -1417,7 +1488,7 @@ static void print_bbccs_of_thread(thread_info* ti)
14171488
if (!CLG_(is_zero_cost)( CLG_(sets).full, ccSum[currSum].cost )) {
14181489
/* no need to switch buffers, as position is the same */
14191490
fprint_apos(print_fp, &(ccSum[currSum].p), &lastAPos,
1420-
lastFnPos.cxt->fn[0]->file);
1491+
lastFnPos.cxt->fn[0]->file, 0);
14211492
fprint_fcost(print_fp, &ccSum[currSum], &lastAPos);
14221493
}
14231494

@@ -1437,6 +1508,7 @@ static void print_bbccs_of_thread(thread_info* ti)
14371508
init_fcost(&ccSum[0], 0, 0, 0);
14381509
init_fcost(&ccSum[1], 0, 0, 0);
14391510
currSum = 0;
1511+
last_inline_fn = 0; /* reset inline function tracking */
14401512
}
14411513

14421514
if (CLG_(clo).dump_bbs) {

callgrind/tests/Makefile.am

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ include $(top_srcdir)/Makefile.tool-tests.am
44
SUBDIRS = .
55
DIST_SUBDIRS = .
66

7-
dist_noinst_SCRIPTS = filter_stderr
7+
dist_noinst_SCRIPTS = filter_stderr filter_inline
88

99
EXTRA_DIST = \
1010
ann1.post.exp ann1.stderr.exp ann1.vgtest \
@@ -23,11 +23,18 @@ EXTRA_DIST = \
2323
notpower2-use.vgtest notpower2-use.stderr.exp \
2424
threads.vgtest threads.stderr.exp \
2525
threads-use.vgtest threads-use.stderr.exp \
26-
find-source.vgtest find-source.stderr.exp find-source.post.exp
26+
find-source.vgtest find-source.stderr.exp find-source.post.exp \
27+
inline-samefile.vgtest inline-samefile.stderr.exp inline-samefile.stdout.exp inline-samefile.post.exp \
28+
inline-crossfile.vgtest inline-crossfile.stderr.exp inline-crossfile.stdout.exp inline-crossfile.post.exp \
29+
inline-crossfile-helper1.h inline-crossfile-helper2.h filter_inline
2730

28-
check_PROGRAMS = clreq simwork threads
31+
check_PROGRAMS = clreq simwork threads inline-samefile inline-crossfile
2932

3033
AM_CFLAGS += $(AM_FLAG_M3264_PRI)
3134
AM_CXXFLAGS += $(AM_FLAG_M3264_PRI)
3235

36+
# Inline tests need -O2 to enable inlining and -g for debug info
37+
inline_samefile_CFLAGS = $(AM_CFLAGS) -O2 -g
38+
inline_crossfile_CFLAGS = $(AM_CFLAGS) -O2 -g
39+
3340
threads_LDADD = -lpthread

callgrind/tests/filter_inline

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#!/bin/sh
2+
3+
# Filter script to extract inline function markers (cfni=) from callgrind output
4+
# Usage: filter_inline <callgrind.out.file> <test_name>
5+
6+
CGout=$1
7+
test_name=$2
8+
9+
# Extract the main function section and show the cfni= markers along with file context
10+
awk '
11+
/^fn=main$/ {
12+
in_main = 1
13+
print "=== Main Function Inline Markers ==="
14+
next
15+
}
16+
17+
in_main && /^fn=/ && !/^fn=main$/ {
18+
in_main = 0
19+
}
20+
21+
in_main {
22+
# Print file context markers (fi=, fe=), inline markers (cfni=), and function calls (cfn=)
23+
if (/^fi=/ || /^fe=/ || /^cfni=/ || /^cfn=/) {
24+
# Extract just the filename from file paths for readability
25+
if (/^fi=/ || /^fe=/) {
26+
marker = substr($0, 1, 3)
27+
path = substr($0, 4)
28+
# Get basename
29+
n = split(path, parts, "/")
30+
print marker parts[n]
31+
}
32+
# Skip address-only cfn= entries (not related to inline tracking)
33+
else if (/^cfn=0x/) {
34+
# Skip
35+
}
36+
else {
37+
print $0
38+
}
39+
}
40+
}
41+
' "$CGout"
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#ifndef HELPER1_H
2+
#define HELPER1_H
3+
4+
// Inline function from helper1.h
5+
static inline int compute_sum(int x) {
6+
int sum = 0;
7+
for (int i = 0; i < x; i++) {
8+
sum += i;
9+
}
10+
return sum;
11+
}
12+
13+
#endif
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#ifndef HELPER2_H
2+
#define HELPER2_H
3+
4+
// Inline function from helper2.h
5+
static inline int compute_product(int x) {
6+
int prod = 1;
7+
for (int i = 1; i <= x; i++) {
8+
prod *= i;
9+
}
10+
return prod;
11+
}
12+
13+
#endif

0 commit comments

Comments
 (0)