Skip to content

Commit ecf7cd6

Browse files
committed
ZJIT: Add Perfetto trace sub-phases for LIR codegen and stubs
1 parent e03b80e commit ecf7cd6

5 files changed

Lines changed: 85 additions & 52 deletions

File tree

zjit/src/backend/arm64/mod.rs

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use crate::codegen::split_patch_point;
44
use crate::cruby::*;
55
use crate::backend::lir::*;
66
use crate::options::asm_dump;
7-
use crate::stats::CompileError;
7+
use crate::stats::{CompileError, trace_compile_phase};
88
use crate::virtualmem::CodePtr;
99
use crate::cast::*;
1010

@@ -1610,14 +1610,15 @@ impl Assembler {
16101610
let use_scratch_reg = !self.accept_scratch_reg;
16111611
asm_dump!(self, init);
16121612

1613-
let mut asm = self.arm64_split();
1613+
let mut asm = trace_compile_phase("split", || self.arm64_split());
16141614

16151615
asm_dump!(asm, split);
16161616

1617-
asm.number_instructions(0);
1617+
let (intervals, assignments, num_stack_slots) = trace_compile_phase("regalloc", || {
1618+
trace_compile_phase("number_instructions", || asm.number_instructions(0));
16181619

1619-
let live_in = asm.analyze_liveness();
1620-
let intervals = asm.build_intervals(live_in);
1620+
let live_in = trace_compile_phase("analyze_liveness", || asm.analyze_liveness());
1621+
let intervals = trace_compile_phase("build_intervals", || asm.build_intervals(live_in));
16211622

16221623
// Dump live intervals if requested
16231624
if let Some(crate::options::Options { dump_lir: Some(dump_lirs), .. }) = unsafe { crate::options::OPTIONS.as_ref() } {
@@ -1627,7 +1628,9 @@ impl Assembler {
16271628
}
16281629

16291630
let preferred_registers = asm.preferred_register_assignments(&intervals);
1630-
let (assignments, num_stack_slots) = asm.linear_scan(intervals.clone(), regs.len(), &preferred_registers);
1631+
let (assignments, num_stack_slots) = trace_compile_phase("linear_scan", || asm.linear_scan(intervals.clone(), regs.len(), &preferred_registers));
1632+
(intervals, assignments, num_stack_slots)
1633+
});
16311634

16321635
let total_stack_slots = asm.stack_base_idx + num_stack_slots;
16331636
if total_stack_slots > Self::MAX_FRAME_STACK_SLOTS {
@@ -1665,36 +1668,41 @@ impl Assembler {
16651668
}
16661669
}
16671670

1668-
asm.handle_caller_saved_regs(&intervals, &assignments, &C_ARG_REGREGS);
1669-
asm.resolve_ssa(&intervals, &assignments);
1671+
trace_compile_phase("resolve_ssa", || {
1672+
asm.handle_caller_saved_regs(&intervals, &assignments, &C_ARG_REGREGS);
1673+
asm.resolve_ssa(&intervals, &assignments);
1674+
});
16701675
asm_dump!(asm, alloc_regs);
16711676

16721677
// We are moved out of SSA after resolve_ssa
16731678

16741679
// We put compile_exits after alloc_regs to avoid extending live ranges for VRegs spilled on side exits.
16751680
// Exit code is compiled into a separate list of instructions that we append
16761681
// to the last reachable block before scratch_split, so it gets linearized and split.
1677-
let exit_insns = asm.compile_exits();
1678-
asm_dump!(asm, compile_exits);
1679-
1680-
// Append exit instructions to the last reachable block so they are
1681-
// included in linearize_instructions and processed by scratch_split.
1682-
if let Some(&last_block) = asm.block_order().last() {
1683-
for insn in exit_insns {
1684-
asm.basic_blocks[last_block.0].insns.push(insn);
1685-
asm.basic_blocks[last_block.0].insn_ids.push(None);
1682+
trace_compile_phase("compile_exits", || {
1683+
let exit_insns = asm.compile_exits();
1684+
1685+
// Append exit instructions to the last reachable block so they are
1686+
// included in linearize_instructions and processed by scratch_split.
1687+
if let Some(&last_block) = asm.block_order().last() {
1688+
for insn in exit_insns {
1689+
asm.basic_blocks[last_block.0].insns.push(insn);
1690+
asm.basic_blocks[last_block.0].insn_ids.push(None);
1691+
}
16861692
}
1687-
}
1693+
});
1694+
asm_dump!(asm, compile_exits);
16881695

16891696
if use_scratch_reg {
1690-
asm = asm.arm64_scratch_split();
1697+
asm = trace_compile_phase("scratch_split", || asm.arm64_scratch_split());
16911698
asm_dump!(asm, scratch_split);
16921699
} else {
16931700
// For trampolines that use scratch registers, resolve ParallelMov without scratch_reg.
1694-
asm = asm.resolve_parallel_mov_pass();
1701+
asm = trace_compile_phase("resolve_parallel_mov", || asm.resolve_parallel_mov_pass());
16951702
asm_dump!(asm, resolve_parallel_mov);
16961703
}
16971704

1705+
trace_compile_phase("emit", || {
16981706
// Create label instances in the code block
16991707
for (idx, name) in asm.label_names.iter().enumerate() {
17001708
let label = cb.new_label(name.to_string());
@@ -1711,6 +1719,7 @@ impl Assembler {
17111719
unsafe { rb_jit_icache_invalidate(start_ptr.raw_ptr(cb) as _, cb.get_write_ptr().raw_ptr(cb) as _) };
17121720

17131721
Ok((start_ptr, gc_offsets))
1722+
})
17141723
}
17151724
}
17161725

zjit/src/backend/x86_64/mod.rs

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use std::mem;
33
use crate::asm::*;
44
use crate::asm::x86_64::*;
55
use crate::codegen::split_patch_point;
6-
use crate::stats::CompileError;
6+
use crate::stats::{CompileError, trace_compile_phase};
77
use crate::virtualmem::CodePtr;
88
use crate::cruby::*;
99
use crate::backend::lir::*;
@@ -1143,14 +1143,15 @@ impl Assembler {
11431143
let use_scratch_regs = !self.accept_scratch_reg;
11441144
asm_dump!(self, init);
11451145

1146-
let mut asm = self.x86_split();
1146+
let mut asm = trace_compile_phase("split", || self.x86_split());
11471147

11481148
asm_dump!(asm, split);
11491149

1150-
asm.number_instructions(0);
1150+
let (intervals, assignments, num_stack_slots) = trace_compile_phase("regalloc", || {
1151+
trace_compile_phase("number_instructions", || asm.number_instructions(0));
11511152

1152-
let live_in = asm.analyze_liveness();
1153-
let intervals = asm.build_intervals(live_in);
1153+
let live_in = trace_compile_phase("analyze_liveness", || asm.analyze_liveness());
1154+
let intervals = trace_compile_phase("build_intervals", || asm.build_intervals(live_in));
11541155

11551156
// Dump live intervals if requested
11561157
if let Some(crate::options::Options { dump_lir: Some(dump_lirs), .. }) = unsafe { crate::options::OPTIONS.as_ref() } {
@@ -1160,7 +1161,9 @@ impl Assembler {
11601161
}
11611162

11621163
let preferred_registers = asm.preferred_register_assignments(&intervals);
1163-
let (assignments, num_stack_slots) = asm.linear_scan(intervals.clone(), regs.len(), &preferred_registers);
1164+
let (assignments, num_stack_slots) = trace_compile_phase("linear_scan", || asm.linear_scan(intervals.clone(), regs.len(), &preferred_registers));
1165+
(intervals, assignments, num_stack_slots)
1166+
});
11641167

11651168
let total_stack_slots = asm.stack_base_idx + num_stack_slots;
11661169
if total_stack_slots > Self::MAX_FRAME_STACK_SLOTS {
@@ -1198,36 +1201,41 @@ impl Assembler {
11981201
}
11991202
}
12001203

1201-
asm.handle_caller_saved_regs(&intervals, &assignments, &C_ARG_REGREGS);
1202-
asm.resolve_ssa(&intervals, &assignments);
1204+
trace_compile_phase("resolve_ssa", || {
1205+
asm.handle_caller_saved_regs(&intervals, &assignments, &C_ARG_REGREGS);
1206+
asm.resolve_ssa(&intervals, &assignments);
1207+
});
12031208
asm_dump!(asm, alloc_regs);
12041209

12051210
// We are moved out of SSA after resolve_ssa
12061211

12071212
// We put compile_exits after alloc_regs to avoid extending live ranges for VRegs spilled on side exits.
12081213
// Exit code is compiled into a separate list of instructions that we append
12091214
// to the last reachable block before scratch_split, so it gets linearized and split.
1210-
let exit_insns = asm.compile_exits();
1211-
asm_dump!(asm, compile_exits);
1212-
1213-
// Append exit instructions to the last reachable block so they are
1214-
// included in linearize_instructions and processed by scratch_split.
1215-
if let Some(&last_block) = asm.block_order().last() {
1216-
for insn in exit_insns {
1217-
asm.basic_blocks[last_block.0].insns.push(insn);
1218-
asm.basic_blocks[last_block.0].insn_ids.push(None);
1215+
trace_compile_phase("compile_exits", || {
1216+
let exit_insns = asm.compile_exits();
1217+
1218+
// Append exit instructions to the last reachable block so they are
1219+
// included in linearize_instructions and processed by scratch_split.
1220+
if let Some(&last_block) = asm.block_order().last() {
1221+
for insn in exit_insns {
1222+
asm.basic_blocks[last_block.0].insns.push(insn);
1223+
asm.basic_blocks[last_block.0].insn_ids.push(None);
1224+
}
12191225
}
1220-
}
1226+
});
1227+
asm_dump!(asm, compile_exits);
12211228

12221229
if use_scratch_regs {
1223-
asm = asm.x86_scratch_split();
1230+
asm = trace_compile_phase("scratch_split", || asm.x86_scratch_split());
12241231
asm_dump!(asm, scratch_split);
12251232
} else {
12261233
// For trampolines that use scratch registers, resolve ParallelMov without scratch_reg.
1227-
asm = asm.resolve_parallel_mov_pass();
1234+
asm = trace_compile_phase("resolve_parallel_mov", || asm.resolve_parallel_mov_pass());
12281235
asm_dump!(asm, resolve_parallel_mov);
12291236
}
12301237

1238+
trace_compile_phase("emit", || {
12311239
// Create label instances in the code block
12321240
for (idx, name) in asm.label_names.iter().enumerate() {
12331241
let label = cb.new_label(name.to_string());
@@ -1240,6 +1248,7 @@ impl Assembler {
12401248

12411249
cb.link_labels().or(Err(CompileError::LabelLinkingFailure))?;
12421250
Ok((start_ptr, gc_offsets))
1251+
})
12431252
}
12441253
}
12451254

zjit/src/codegen.rs

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ pub fn invalidate_iseq_version(cb: &mut CodeBlock, iseq: IseqPtr, version: &mut
242242

243243
/// Stub a branch for a JIT-to-JIT call
244244
pub fn gen_iseq_call(cb: &mut CodeBlock, iseq_call: &IseqCallRef) -> Result<(), CompileError> {
245+
trace_compile_phase("compile_stub", || {
245246
// Compile a function stub
246247
let stub_ptr = gen_function_stub(cb, iseq_call.clone()).inspect_err(|err| {
247248
debug!("{err:?}: gen_function_stub failed: {}", iseq_get_location(iseq_call.iseq.get(), 0));
@@ -255,6 +256,7 @@ pub fn gen_iseq_call(cb: &mut CodeBlock, iseq_call: &IseqCallRef) -> Result<(),
255256
asm.ccall_into(C_RET_OPND, stub_addr, vec![]);
256257
});
257258
Ok(())
259+
})
258260
}
259261

260262
/// Write an entry to the perf map in /tmp
@@ -352,14 +354,20 @@ fn gen_iseq_body(cb: &mut CodeBlock, iseq: IseqPtr, mut version: IseqVersionRef,
352354

353355
// Compile the High-level IR
354356
let (iseq_code_ptrs, gc_offsets, iseq_calls) =
355-
trace_compile_phase("codegen", ||
356-
crate::stats::with_time_stat(Counter::compile_lir_time_ns, || gen_function(cb, iseq, version, function))
357-
)?;
357+
trace_compile_phase("codegen", || {
358+
let (iseq_code_ptrs, gc_offsets, iseq_calls) =
359+
crate::stats::with_time_stat(Counter::compile_lir_time_ns, || gen_function(cb, iseq, version, function))?;
360+
361+
// Stub callee ISEQs for JIT-to-JIT calls
362+
trace_compile_phase("generate_jit_jit_stubs", || {
363+
for iseq_call in iseq_calls.iter() {
364+
gen_iseq_call(cb, iseq_call)?;
365+
}
366+
Ok::<(), CompileError>(())
367+
})?;
358368

359-
// Stub callee ISEQs for JIT-to-JIT calls
360-
for iseq_call in iseq_calls.iter() {
361-
gen_iseq_call(cb, iseq_call)?;
362-
}
369+
Ok((iseq_code_ptrs, gc_offsets, iseq_calls))
370+
})?;
363371

364372
// Prepare for GC
365373
unsafe { version.as_mut() }.outgoing.extend(iseq_calls);
@@ -369,6 +377,7 @@ fn gen_iseq_body(cb: &mut CodeBlock, iseq: IseqPtr, mut version: IseqVersionRef,
369377

370378
/// Compile a function
371379
fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, version: IseqVersionRef, function: &Function) -> Result<(IseqCodePtrs, Vec<CodePtr>, Vec<IseqCallRef>), CompileError> {
380+
let (mut jit, asm) = trace_compile_phase("lowering", || {
372381
let num_spilled_params = max_num_params(function).saturating_sub(ALLOC_REGS.len());
373382
let mut jit = JITState::new(iseq, version, function.num_insns(), function.num_blocks());
374383
let mut asm = Assembler::new_with_stack_slots(num_spilled_params);
@@ -546,6 +555,9 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, version: IseqVersionRef, func
546555
// Validate CFG invariants after HIR to LIR lowering
547556
asm.validate_jump_positions();
548557

558+
(jit, asm)
559+
});
560+
549561
// Generate code if everything can be compiled
550562
let result = asm.compile(cb);
551563
if let Ok((start_ptr, _)) = result {
@@ -3270,9 +3282,11 @@ fn function_stub_hit_body(cb: &mut CodeBlock, iseq_call: &IseqCallRef) -> Result
32703282
let jit_entry_ptr = jit_entry_ptrs[iseq_call.jit_entry_idx.to_usize()];
32713283
let code_addr = jit_entry_ptr.raw_ptr(cb);
32723284
let iseq = iseq_call.iseq.get();
3273-
iseq_call.regenerate(cb, |asm| {
3274-
asm_comment!(asm, "call compiled function: {}", iseq_get_location(iseq, 0));
3275-
asm.ccall_into(C_RET_OPND, code_addr, vec![]);
3285+
trace_compile_phase("compile_stub", || {
3286+
iseq_call.regenerate(cb, |asm| {
3287+
asm_comment!(asm, "call compiled function: {}", iseq_get_location(iseq, 0));
3288+
asm.ccall_into(C_RET_OPND, code_addr, vec![]);
3289+
});
32763290
});
32773291

32783292
Ok(jit_entry_ptr)

zjit/src/hir.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5933,7 +5933,7 @@ impl Function {
59335933
crate::stats::trace_compile_phase(stringify!($name), ||
59345934
crate::stats::with_time_stat(counter, || self.$name())
59355935
);
5936-
#[cfg(debug_assertions)] self.assert_validates();
5936+
#[cfg(debug_assertions)] crate::stats::trace_compile_phase("validate", || self.assert_validates());
59375937
if should_dump {
59385938
passes.push(
59395939
self.to_iongraph_pass(stringify!($name))
@@ -8414,7 +8414,7 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result<Function, ParseError> {
84148414
}
84158415

84168416
fun.profiles = Some(profiles);
8417-
if let Err(err) = fun.validate() {
8417+
if let Err(err) = crate::stats::trace_compile_phase("validate", || fun.validate()) {
84188418
debug!("ZJIT: {err:?}: Initial HIR:\n{}", FunctionPrinter::without_snapshot(&fun));
84198419
return Err(ParseError::Validation(err));
84208420
}

zjit/src/state.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,7 @@ impl ZJITState {
300300

301301
/// Get a mutable reference to the Perfetto tracer
302302
pub fn get_tracer() -> Option<&'static mut PerfettoTracer> {
303+
if !ZJITState::has_instance() { return None; }
303304
ZJITState::get_instance().perfetto_tracer.as_mut()
304305
}
305306
}

0 commit comments

Comments
 (0)