Skip to content

Commit 4426b14

Browse files
Reduce CallLogger tracing overhead by 8.4×
Rotoscope's CallLogger adds significant overhead when tracing method-call-heavy code — users reported it doubling test execution time. This was primarily caused by excessive Ruby↔C transitions, per-event memory allocations, and redundant work in the hot path that fires for every traced method call. Key changes: **Move CSV formatting entirely to C** — The old Ruby log_call method made ~10 round trips to C per event (calling receiver_class_name, method_name, caller_path, etc. individually). A new C function builds the entire CSV line without returning to Ruby, then writes it into a shared buffer that flushes at 64KB. **Native logger bypass** — CallLogger now registers its IO/excludelist directly on the C struct via set_native_logger. The event_hook calls the C formatting function directly, never entering Ruby for the CallLogger path. **Stack-derived caller info** — Instead of calling rb_profile_frames (which walks Ruby's internal stack) for every event, derive the caller's method name and singleton status from our own internal stack. rb_profile_frames is now only called for Ruby CALL events where we need the caller's file path and line number. **Eliminate wasted work** — Skip the excludelist regex match when no exclusions are configured (was calling rb_funcall 1.4M times for an empty pattern that never matches). Cache class names in stack frames. Pre-allocate frozen string constants. Before: 2,111ms overhead (59× ratio) on method-call-heavy workload After: 250ms overhead (10× ratio) A test suite that took 20s with tracing now takes ~11s. 41 tests, 59 assertions, 0 failures, 0 errors. 100% Ruby line coverage. No memory leaks (macOS leaks tool). No use-after-free (Guard Malloc). No buffer overflows.
1 parent 2880844 commit 4426b14

14 files changed

Lines changed: 901 additions & 160 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ lib/rotoscope/rotoscope.so
66
.rubocop-*
77
Gemfile.lock
88
.bundle
9+
coverage/

Rakefile

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,13 @@ task :rubocop do
5050
RuboCop::RakeTask.new
5151
end
5252

53+
# ==========================================================
54+
# Benchmarking
55+
# ==========================================================
56+
57+
desc "Run benchmarks (set BENCH_N for iterations, default 10000)"
58+
task bench: :build do
59+
ruby "bench/benchmark.rb"
60+
end
61+
5362
task(default: [:test, :rubocop])

ext/rotoscope/callsite.c

Lines changed: 15 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,49 +4,37 @@
44
#include <ruby/debug.h>
55
#include <stdbool.h>
66

7-
static VALUE caller_frame(int *line, bool ruby_call) {
8-
VALUE frames[2] = {Qnil, Qnil};
9-
int lines[2] = {0, 0};
7+
static VALUE caller_frame(int *line) {
8+
VALUE frame = Qnil;
9+
int frame_line = 0;
1010

11-
// At this point, for ruby calls, the top ruby stack frame is
12-
// for the method being called, so we want to skip that frame
13-
// and get the caller location. This is why we use 1 for ruby
14-
// calls.
15-
//
16-
// However, for C call, the top stack frame is for the caller,
17-
// so we don't want to have to skip over any extra stack frames
18-
// for C calls.
19-
int frame_index = ruby_call ? 1 : 0;
11+
// For ruby calls, the top frame is the callee — skip it to get the caller.
12+
// Ruby 4.0+ fixed rb_profile_frames start argument (ruby-lang bug #14607).
13+
rb_profile_frames(1, 1, &frame, &frame_line);
2014

21-
// There is currently a bug in rb_profile_frames that
22-
// causes the start argument to effectively always
23-
// act as if it were 0, so we need to also get the top
24-
// frame. (https://bugs.ruby-lang.org/issues/14607)
25-
rb_profile_frames(0, frame_index + 1, frames, lines);
26-
27-
*line = lines[frame_index];
28-
return frames[frame_index];
15+
*line = frame_line;
16+
return frame;
2917
}
3018

3119
rs_callsite_t c_callsite(rb_trace_arg_t *trace_arg) {
32-
int line;
33-
VALUE frame = caller_frame(&line, false);
3420
return (rs_callsite_t){
3521
.filepath = rb_tracearg_path(trace_arg),
3622
.lineno = FIX2INT(rb_tracearg_lineno(trace_arg)),
37-
.method_name = rb_profile_frame_method_name(frame),
38-
.singleton_p = rb_profile_frame_singleton_method_p(frame),
23+
.method_name = Qnil,
24+
.singleton_p = Qnil,
25+
.profile_frame = Qnil,
3926
};
4027
}
4128

4229
rs_callsite_t ruby_callsite() {
4330
int line;
44-
VALUE frame = caller_frame(&line, true);
31+
VALUE frame = caller_frame(&line);
4532

4633
return (rs_callsite_t){
4734
.filepath = rb_profile_frame_path(frame),
4835
.lineno = line,
49-
.method_name = rb_profile_frame_method_name(frame),
50-
.singleton_p = rb_profile_frame_singleton_method_p(frame),
36+
.method_name = Qnil, // filled from stack or profile_frame in event_hook
37+
.singleton_p = Qnil,
38+
.profile_frame = frame,
5139
};
5240
}

ext/rotoscope/callsite.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ typedef struct {
99
unsigned int lineno;
1010
VALUE method_name;
1111
VALUE singleton_p;
12+
VALUE profile_frame; // for lazy method_name/singleton_p extraction
1213
} rs_callsite_t;
1314

1415
rs_callsite_t c_callsite(rb_trace_arg_t *trace_arg);

ext/rotoscope/method_desc.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,5 @@ VALUE rs_method_class(rs_method_desc_t *method) {
88
void rs_method_desc_mark(rs_method_desc_t *method) {
99
rb_gc_mark(method->receiver);
1010
rb_gc_mark(method->id);
11+
rb_gc_mark(method->class_name);
1112
}

ext/rotoscope/method_desc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
typedef struct rs_method_desc_t {
88
VALUE receiver;
99
VALUE id;
10+
VALUE class_name;
1011
bool singleton_p;
1112
} rs_method_desc_t;
1213

0 commit comments

Comments
 (0)