Skip to content

Commit 52169ef

Browse files
committed
ZJIT: Add Perfetto duration events for compilation phases
1 parent 70e8654 commit 52169ef

5 files changed

Lines changed: 88 additions & 18 deletions

File tree

zjit/src/codegen.rs

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ use crate::gc::append_gc_offsets;
1717
use crate::payload::{IseqCodePtrs, IseqStatus, IseqVersion, IseqVersionRef, JITFrame, get_or_create_iseq_payload};
1818
use crate::state::ZJITState;
1919
use crate::stats::{CompileError, exit_counter_for_compile_error, exit_counter_for_unhandled_hir_insn, incr_counter, incr_counter_by, send_fallback_counter, send_fallback_counter_for_method_type, send_fallback_counter_for_super_method_type, send_fallback_counter_ptr_for_opcode, send_without_block_fallback_counter_for_method_type, send_without_block_fallback_counter_for_optimized_method_type};
20-
use crate::stats::{counter_ptr, with_time_stat, Counter, Counter::{compile_time_ns, exit_compile_error}};
20+
use crate::stats::{counter_ptr, with_time_stat, trace_compile_phase, Counter, Counter::{compile_time_ns, exit_compile_error}};
2121
use crate::{asm::CodeBlock, cruby::*, options::debug, virtualmem::CodePtr};
2222
use crate::backend::lir::{self, Assembler, C_ARG_OPNDS, C_RET_OPND, CFP, EC, NATIVE_STACK_PTR, Opnd, SP, SideExit, SideExitRecompile, Target, asm_ccall, asm_comment};
2323
use crate::hir::{iseq_to_hir, BlockId, Invariant, RangeType, SideExitReason::{self, *}, SpecialBackrefSymbol, SpecialObjectType};
@@ -200,17 +200,20 @@ fn gen_iseq_entry_point(cb: &mut CodeBlock, iseq: IseqPtr, jit_exception: bool)
200200
return Err(CompileError::ExceptionHandler);
201201
}
202202

203-
// Compile ISEQ into High-level IR
204-
let function = crate::stats::with_time_stat(Counter::compile_hir_time_ns, || compile_iseq(iseq).inspect_err(|_| {
205-
incr_counter!(failed_iseq_count);
206-
}))?;
203+
let iseq_name = iseq_get_location(iseq, 0);
204+
trace_compile_phase(&iseq_name, || {
205+
// Compile ISEQ into High-level IR
206+
let function = crate::stats::with_time_stat(Counter::compile_hir_time_ns, || compile_iseq(iseq).inspect_err(|_| {
207+
incr_counter!(failed_iseq_count);
208+
}))?;
207209

208-
// Compile the High-level IR
209-
let IseqCodePtrs { start_ptr, .. } = gen_iseq(cb, iseq, Some(&function)).inspect_err(|err| {
210-
debug!("{err:?}: gen_iseq failed: {}", iseq_get_location(iseq, 0));
211-
})?;
210+
// Compile the High-level IR
211+
let IseqCodePtrs { start_ptr, .. } = gen_iseq(cb, iseq, Some(&function)).inspect_err(|err| {
212+
debug!("{err:?}: gen_iseq failed: {}", iseq_get_location(iseq, 0));
213+
})?;
212214

213-
Ok(start_ptr)
215+
Ok(start_ptr)
216+
})
214217
}
215218

216219
/// Invalidate an ISEQ version and allow it to be recompiled on the next call.
@@ -312,9 +315,14 @@ fn gen_iseq(cb: &mut CodeBlock, iseq: IseqPtr, function: Option<&Function>) -> R
312315
return Err(CompileError::IseqVersionLimitReached);
313316
}
314317

315-
// Compile the ISEQ
318+
// Compile the ISEQ. When function is None, this is a lazy compile
319+
// from a stub hit — wrap in a trace event covering the full compile.
316320
let mut version = IseqVersion::new(iseq);
317-
let code_ptrs = gen_iseq_body(cb, iseq, version, function);
321+
let code_ptrs = if function.is_none() {
322+
trace_compile_phase(&iseq_get_location(iseq, 0), || gen_iseq_body(cb, iseq, version, function))
323+
} else {
324+
gen_iseq_body(cb, iseq, version, function)
325+
};
318326
match &code_ptrs {
319327
Ok(code_ptrs) => {
320328
unsafe { version.as_mut() }.status = IseqStatus::Compiled(code_ptrs.clone());
@@ -344,7 +352,9 @@ fn gen_iseq_body(cb: &mut CodeBlock, iseq: IseqPtr, mut version: IseqVersionRef,
344352

345353
// Compile the High-level IR
346354
let (iseq_code_ptrs, gc_offsets, iseq_calls) =
347-
crate::stats::with_time_stat(Counter::compile_lir_time_ns, || gen_function(cb, iseq, version, function))?;
355+
trace_compile_phase("codegen", ||
356+
crate::stats::with_time_stat(Counter::compile_lir_time_ns, || gen_function(cb, iseq, version, function))
357+
)?;
348358

349359
// Stub callee ISEQs for JIT-to-JIT calls
350360
for iseq_call in iseq_calls.iter() {
@@ -2955,7 +2965,9 @@ fn compile_iseq(iseq: IseqPtr) -> Result<Function, CompileError> {
29552965
return Err(CompileError::IseqStackTooLarge);
29562966
}
29572967

2958-
let hir = crate::stats::with_time_stat(Counter::compile_hir_build_time_ns, || iseq_to_hir(iseq));
2968+
let hir = trace_compile_phase("build_hir", ||
2969+
crate::stats::with_time_stat(Counter::compile_hir_build_time_ns, || iseq_to_hir(iseq))
2970+
);
29592971
let mut function = match hir {
29602972
Ok(function) => function,
29612973
Err(err) => {
@@ -2964,7 +2976,7 @@ fn compile_iseq(iseq: IseqPtr) -> Result<Function, CompileError> {
29642976
}
29652977
};
29662978
if !get_option!(disable_hir_opt) {
2967-
function.optimize();
2979+
trace_compile_phase("optimize", || function.optimize());
29682980
}
29692981
function.dump_hir();
29702982
Ok(function)

zjit/src/hir.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5930,7 +5930,9 @@ impl Function {
59305930
macro_rules! run_pass {
59315931
($name:ident) => {
59325932
let counter = counter_for!($name);
5933-
crate::stats::with_time_stat(counter, || self.$name());
5933+
crate::stats::trace_compile_phase(stringify!($name), ||
5934+
crate::stats::with_time_stat(counter, || self.$name())
5935+
);
59345936
#[cfg(debug_assertions)] self.assert_validates();
59355937
if should_dump {
59365938
passes.push(

zjit/src/options.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ pub struct Options {
9797
/// Frequency of tracing side exits.
9898
pub trace_side_exits_sample_interval: usize,
9999

100+
/// Trace compilation phases as Perfetto duration events.
101+
pub trace_compiles: bool,
102+
100103
/// Dump code map to /tmp for performance profilers.
101104
pub perf: Option<PerfMap>,
102105

@@ -130,6 +133,7 @@ impl Default for Options {
130133
dump_disasm: None,
131134
trace_side_exits: None,
132135
trace_side_exits_sample_interval: 0,
136+
trace_compiles: false,
133137
perf: None,
134138
allowed_iseqs: None,
135139
log_compiled_iseqs: None,
@@ -161,7 +165,9 @@ pub const ZJIT_OPTIONS: &[(&str, &str)] = &[
161165
("--zjit-trace-exits[=counter]",
162166
"Record source on side-exit. `Counter` picks specific counter."),
163167
("--zjit-trace-exits-sample-rate=num",
164-
"Frequency at which to record side exits. Must be `usize`.")
168+
"Frequency at which to record side exits. Must be `usize`."),
169+
("--zjit-trace-compiles",
170+
"Record compilation phases as Perfetto trace events.")
165171
];
166172

167173
#[derive(Copy, Clone, Debug)]
@@ -388,6 +394,8 @@ fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
388394
options.trace_side_exits_sample_interval = sample_interval.parse::<usize>().ok()?;
389395
}
390396

397+
("trace-compiles", "") => options.trace_compiles = true,
398+
391399
("debug", "") => options.debug = true,
392400

393401
("disable", "") => options.disable = true,

zjit/src/state.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ impl ZJITState {
128128
let exit_trampoline = gen_exit_trampoline(&mut cb).unwrap();
129129
let function_stub_hit_trampoline = gen_function_stub_hit_trampoline(&mut cb).unwrap();
130130

131-
let perfetto_tracer = if get_option!(trace_side_exits).is_some() {
131+
let perfetto_tracer = if get_option!(trace_side_exits).is_some() || get_option!(trace_compiles) {
132132
Some(PerfettoTracer::new())
133133
} else {
134134
None

zjit/src/stats.rs

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -993,6 +993,23 @@ pub fn zjit_alloc_bytes() -> usize {
993993
jit::GLOBAL_ALLOCATOR.alloc_size.load(Ordering::SeqCst)
994994
}
995995

996+
/// Record a Perfetto duration-complete event spanning the execution of `func`.
997+
/// Nested calls produce nested events on the same thread in the trace viewer.
998+
pub fn trace_compile_phase<F, R>(name: &str, func: F) -> R where F: FnOnce() -> R {
999+
if !get_option!(trace_compiles) {
1000+
return func();
1001+
}
1002+
let start_ns = ZJITState::get_tracer().map(|t| t.elapsed_ns());
1003+
let result = func();
1004+
if let Some(start_ns) = start_ns {
1005+
if let Some(tracer) = ZJITState::get_tracer() {
1006+
let end_ns = tracer.elapsed_ns();
1007+
tracer.write_duration_complete("compile", name, start_ns, end_ns);
1008+
}
1009+
}
1010+
result
1011+
}
1012+
9961013
/// Fuchsia Trace Format (FXT) binary writer for --zjit-trace-exits.
9971014
/// Produces .fxt files that can be opened directly in Perfetto UI.
9981015
/// Uses the string table for deduplication of repeated reason/frame strings.
@@ -1060,6 +1077,7 @@ impl PerfettoTracer {
10601077

10611078
// Pre-intern common strings
10621079
tracer.intern_string("side_exit");
1080+
tracer.intern_string("compile");
10631081
// Pre-intern argument names "0".."14" for per-frame arguments
10641082
for i in 0..15u32 {
10651083
tracer.intern_string(&i.to_string());
@@ -1107,6 +1125,36 @@ impl PerfettoTracer {
11071125
idx
11081126
}
11091127

1128+
/// Return nanoseconds elapsed since tracer creation.
1129+
pub fn elapsed_ns(&self) -> u64 {
1130+
self.start_time.elapsed().as_nanos() as u64
1131+
}
1132+
1133+
/// Write a Duration Complete event (FXT event type 4).
1134+
/// Records an event spanning from `start_ns` to `end_ns` (both in ticks
1135+
/// relative to `self.start_time`).
1136+
pub fn write_duration_complete(&mut self, category: &str, name: &str, start_ns: u64, end_ns: u64) {
1137+
let category_ref = self.intern_string(category);
1138+
let name_ref = self.intern_string(name);
1139+
1140+
// Duration Complete: header + start_ts + end_ts = 3 words, no args
1141+
let event_words: u64 = 3;
1142+
let header: u64 = 4u64 // record type = event
1143+
| (event_words << 4) // record size
1144+
| (4u64 << 16) // event type = duration complete
1145+
| (1u64 << 24) // thread_ref = 1
1146+
| ((category_ref as u64) << 32)
1147+
| ((name_ref as u64) << 48);
1148+
self.write_word(header);
1149+
self.write_word(start_ns);
1150+
self.write_word(end_ns);
1151+
1152+
self.event_count += 1;
1153+
1154+
use std::io::Write;
1155+
let _ = self.writer.flush();
1156+
}
1157+
11101158
pub fn write_event(&mut self, reason: &str, frames: &[String]) {
11111159
let ts_nanos = self.start_time.elapsed().as_nanos() as u64;
11121160

0 commit comments

Comments
 (0)