Skip to content

Commit 1b5c4dc

Browse files
committed
ZJIT: Add invalidation duration events with backtraces to FXT trace
1 parent 52169ef commit 1b5c4dc

3 files changed

Lines changed: 91 additions & 23 deletions

File tree

zjit/src/invariants.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
33
use std::{collections::{HashMap, HashSet}, mem};
44

5-
use crate::{backend::lir::{Assembler, asm_comment}, cruby::{ID, IseqPtr, RedefinitionFlag, VALUE, iseq_name, rb_callable_method_entry_t, rb_gc_location, ruby_basic_operators, src_loc, with_vm_lock}, hir::Invariant, options::debug, state::{ZJITState, zjit_enabled_p}, virtualmem::CodePtr};
5+
use crate::{backend::lir::{Assembler, asm_comment}, cruby::{ID, IseqPtr, RedefinitionFlag, VALUE, iseq_name, rb_callable_method_entry_t, rb_gc_location, ruby_basic_operators, src_loc, with_vm_lock}, hir::Invariant, options::debug, state::{ZJITState, zjit_enabled_p, trace_invalidation}, virtualmem::CodePtr};
66
use crate::payload::{IseqVersionRef, get_or_create_iseq_payload};
77
use crate::codegen::invalidate_iseq_version;
88
use crate::cruby::rb_iseq_reset_jit_func;
@@ -12,7 +12,7 @@ use crate::gc::remove_gc_offsets;
1212

1313
macro_rules! compile_patch_points {
1414
($cb:expr, $patch_points:expr, $cause:ident, $($comment_args:tt)*) => {
15-
with_time_stat(invalidation_time_ns, || {
15+
trace_invalidation(&format!($($comment_args)*), || with_time_stat(invalidation_time_ns, || {
1616
for patch_point in $patch_points {
1717
let written_range = $cb.with_write_ptr(patch_point.patch_point_ptr, |cb| {
1818
let mut asm = Assembler::new();
@@ -35,7 +35,7 @@ macro_rules! compile_patch_points {
3535
}
3636
}
3737
}
38-
});
38+
}));
3939
};
4040
}
4141

zjit/src/state.rs

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -498,5 +498,48 @@ pub extern "C" fn rb_zjit_record_exit_stack(reason: *const std::ffi::c_char) {
498498
unsafe { std::ffi::CStr::from_ptr(reason).to_str().unwrap_or("unknown") }
499499
};
500500

501-
tracer.write_event(reason_str, &frames);
501+
tracer.write_event("side_exit", reason_str, &frames);
502+
}
503+
504+
/// Wrap a closure in a Perfetto duration event with category "invalidation"
505+
/// and a Ruby backtrace captured on the begin event.
506+
pub fn trace_invalidation<F, R>(reason: &str, func: F) -> R where F: FnOnce() -> R {
507+
if !get_option!(trace_compiles) {
508+
return func();
509+
}
510+
511+
// Capture backtrace and emit begin event before patching
512+
let frames = capture_ruby_frames();
513+
if let Some(tracer) = ZJITState::get_tracer() {
514+
let ts = tracer.elapsed_ns();
515+
tracer.write_duration_begin("invalidation", reason, ts, &frames);
516+
}
517+
518+
let result = func();
519+
520+
if let Some(tracer) = ZJITState::get_tracer() {
521+
let ts = tracer.elapsed_ns();
522+
tracer.write_duration_end("invalidation", reason, ts);
523+
}
524+
result
525+
}
526+
527+
/// Capture the current Ruby call stack as human-readable frame labels.
528+
fn capture_ruby_frames() -> Vec<String> {
529+
const BUFF_LEN: usize = 2048;
530+
let mut frames_buffer = vec![VALUE(0_usize); BUFF_LEN];
531+
let mut lines_buffer = vec![0i32; BUFF_LEN];
532+
533+
let stack_length = unsafe {
534+
rb_profile_frames(
535+
0,
536+
BUFF_LEN as i32,
537+
frames_buffer.as_mut_ptr(),
538+
lines_buffer.as_mut_ptr(),
539+
)
540+
};
541+
542+
(0..stack_length as usize)
543+
.map(|i| resolve_frame_label(frames_buffer[i]))
544+
.collect()
502545
}

zjit/src/stats.rs

Lines changed: 44 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -993,19 +993,20 @@ pub fn zjit_alloc_bytes() -> usize {
993993
jit::GLOBAL_ALLOCATOR.alloc_size.load(Ordering::SeqCst)
994994
}
995995

996-
/// Record a Perfetto duration-complete event spanning the execution of `func`.
997-
/// Nested calls produce nested events on the same thread in the trace viewer.
996+
/// Record a Perfetto duration event spanning the execution of `func`.
997+
/// Uses Begin/End pairs so nested calls produce properly nested slices.
998998
pub fn trace_compile_phase<F, R>(name: &str, func: F) -> R where F: FnOnce() -> R {
999999
if !get_option!(trace_compiles) {
10001000
return func();
10011001
}
1002-
let start_ns = ZJITState::get_tracer().map(|t| t.elapsed_ns());
1002+
if let Some(tracer) = ZJITState::get_tracer() {
1003+
let ts = tracer.elapsed_ns();
1004+
tracer.write_duration_begin("compile", name, ts, &[]);
1005+
}
10031006
let result = func();
1004-
if let Some(start_ns) = start_ns {
1005-
if let Some(tracer) = ZJITState::get_tracer() {
1006-
let end_ns = tracer.elapsed_ns();
1007-
tracer.write_duration_complete("compile", name, start_ns, end_ns);
1008-
}
1007+
if let Some(tracer) = ZJITState::get_tracer() {
1008+
let ts = tracer.elapsed_ns();
1009+
tracer.write_duration_end("compile", name, ts);
10091010
}
10101011
result
10111012
}
@@ -1078,6 +1079,7 @@ impl PerfettoTracer {
10781079
// Pre-intern common strings
10791080
tracer.intern_string("side_exit");
10801081
tracer.intern_string("compile");
1082+
tracer.intern_string("invalidation");
10811083
// Pre-intern argument names "0".."14" for per-frame arguments
10821084
for i in 0..15u32 {
10831085
tracer.intern_string(&i.to_string());
@@ -1130,36 +1132,59 @@ impl PerfettoTracer {
11301132
self.start_time.elapsed().as_nanos() as u64
11311133
}
11321134

1133-
/// Write a Duration Complete event (FXT event type 4).
1134-
/// Records an event spanning from `start_ns` to `end_ns` (both in ticks
1135-
/// relative to `self.start_time`).
1136-
pub fn write_duration_complete(&mut self, category: &str, name: &str, start_ns: u64, end_ns: u64) {
1135+
/// Write a Duration Begin event (FXT event type 2) with optional frame arguments.
1136+
pub fn write_duration_begin(&mut self, category: &str, name: &str, ts_ns: u64, frames: &[String]) {
1137+
self.write_duration_event(2, category, name, ts_ns, frames);
1138+
}
1139+
1140+
/// Write a Duration End event (FXT event type 3).
1141+
pub fn write_duration_end(&mut self, category: &str, name: &str, ts_ns: u64) {
1142+
self.write_duration_event(3, category, name, ts_ns, &[]);
1143+
}
1144+
1145+
/// Write a Duration Begin or End event with optional frame arguments.
1146+
fn write_duration_event(&mut self, event_type: u64, category: &str, name: &str, ts_ns: u64, frames: &[String]) {
11371147
let category_ref = self.intern_string(category);
11381148
let name_ref = self.intern_string(name);
11391149

1140-
// Duration Complete: header + start_ts + end_ts = 3 words, no args
1141-
let event_words: u64 = 3;
1150+
let n_args = frames.len().min(15) as u64;
1151+
let mut frame_refs: Vec<(u16, u16)> = Vec::with_capacity(n_args as usize);
1152+
for (i, frame) in frames.iter().take(15).enumerate() {
1153+
let fname_ref = self.intern_string(&i.to_string());
1154+
let value_ref = self.intern_string(frame);
1155+
frame_refs.push((fname_ref, value_ref));
1156+
}
1157+
1158+
let event_words: u64 = 2 + n_args;
11421159
let header: u64 = 4u64 // record type = event
11431160
| (event_words << 4) // record size
1144-
| (4u64 << 16) // event type = duration complete
1161+
| (event_type << 16) // event type = begin or end
1162+
| (n_args << 20) // argument count
11451163
| (1u64 << 24) // thread_ref = 1
11461164
| ((category_ref as u64) << 32)
11471165
| ((name_ref as u64) << 48);
11481166
self.write_word(header);
1149-
self.write_word(start_ns);
1150-
self.write_word(end_ns);
1167+
self.write_word(ts_ns);
1168+
1169+
for (fname_ref, value_ref) in frame_refs {
1170+
let arg_header: u64 = 6u64
1171+
| (1u64 << 4)
1172+
| ((fname_ref as u64) << 16)
1173+
| ((value_ref as u64) << 32);
1174+
self.write_word(arg_header);
1175+
}
11511176

11521177
self.event_count += 1;
11531178

11541179
use std::io::Write;
11551180
let _ = self.writer.flush();
11561181
}
11571182

1158-
pub fn write_event(&mut self, reason: &str, frames: &[String]) {
1183+
pub fn write_event(&mut self, category: &str, reason: &str, frames: &[String]) {
11591184
let ts_nanos = self.start_time.elapsed().as_nanos() as u64;
11601185

11611186
// Intern event metadata strings (may emit string records first)
1162-
let category_ref = self.intern_string("side_exit");
1187+
let category_ref = self.intern_string(category);
11631188
let name_ref = self.intern_string(reason);
11641189

11651190
// Intern each frame label and collect refs (max 15 due to 4-bit n_args)

0 commit comments

Comments
 (0)