Skip to content

Commit 7687c4d

Browse files
authored
ZJIT: Trace compile events and invalidation events (ruby#16666)
Trace compiles with `--zjit-trace-compiles` and invalidation events with `--zjit-trace-invalidation`. See [sample trace](https://ui.perfetto.dev/#!/?url=https://bernsteinbear.com/tmp-perfetto/perfetto-24803.fxt) (in a moment, when the file uploads). https://github.com/user-attachments/assets/2ed578e9-4e21-4051-8e98-777ff082bef6
1 parent 8dd7300 commit 7687c4d

File tree

8 files changed

+532
-334
lines changed

8 files changed

+532
-334
lines changed

zjit/src/backend/arm64/mod.rs

Lines changed: 79 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use crate::codegen::split_patch_point;
44
use crate::cruby::*;
55
use crate::backend::lir::*;
66
use crate::options::asm_dump;
7-
use crate::stats::CompileError;
7+
use crate::stats::{CompileError, trace_compile_phase};
88
use crate::virtualmem::CodePtr;
99
use crate::cast::*;
1010

@@ -1610,107 +1610,119 @@ impl Assembler {
16101610
let use_scratch_reg = !self.accept_scratch_reg;
16111611
asm_dump!(self, init);
16121612

1613-
let mut asm = self.arm64_split();
1613+
let mut asm = trace_compile_phase("split", || self.arm64_split());
16141614

16151615
asm_dump!(asm, split);
16161616

1617-
asm.number_instructions(0);
1617+
trace_compile_phase("regalloc", || {
1618+
trace_compile_phase("number_instructions", || asm.number_instructions(0));
16181619

1619-
let live_in = asm.analyze_liveness();
1620-
let intervals = asm.build_intervals(live_in);
1620+
let live_in = trace_compile_phase("analyze_liveness", || asm.analyze_liveness());
1621+
let intervals = trace_compile_phase("build_intervals", || asm.build_intervals(live_in));
16211622

1622-
// Dump live intervals if requested
1623-
if let Some(crate::options::Options { dump_lir: Some(dump_lirs), .. }) = unsafe { crate::options::OPTIONS.as_ref() } {
1624-
if dump_lirs.contains(&crate::options::DumpLIR::live_intervals) {
1625-
println!("LIR live_intervals:\n{}", crate::backend::lir::debug_intervals(&asm, &intervals));
1623+
// Dump live intervals if requested
1624+
if let Some(crate::options::Options { dump_lir: Some(dump_lirs), .. }) = unsafe { crate::options::OPTIONS.as_ref() } {
1625+
if dump_lirs.contains(&crate::options::DumpLIR::live_intervals) {
1626+
println!("LIR live_intervals:\n{}", crate::backend::lir::debug_intervals(&asm, &intervals));
1627+
}
16261628
}
1627-
}
16281629

1629-
let preferred_registers = asm.preferred_register_assignments(&intervals);
1630-
let (assignments, num_stack_slots) = asm.linear_scan(intervals.clone(), regs.len(), &preferred_registers);
1630+
let preferred_registers = trace_compile_phase("preferred_registers", || asm.preferred_register_assignments(&intervals));
1631+
let (assignments, num_stack_slots) = trace_compile_phase("linear_scan", || asm.linear_scan(intervals.clone(), regs.len(), &preferred_registers));
16311632

1632-
let total_stack_slots = asm.stack_base_idx + num_stack_slots;
1633-
if total_stack_slots > Self::MAX_FRAME_STACK_SLOTS {
1634-
return Err(CompileError::NativeStackTooLarge);
1635-
}
1633+
let total_stack_slots = asm.stack_base_idx + num_stack_slots;
1634+
if total_stack_slots > Self::MAX_FRAME_STACK_SLOTS {
1635+
return Err(CompileError::NativeStackTooLarge);
1636+
}
16361637

1637-
// Dump vreg-to-physical-register mapping if requested
1638-
if let Some(crate::options::Options { dump_lir: Some(dump_lirs), .. }) = unsafe { crate::options::OPTIONS.as_ref() } {
1639-
if dump_lirs.contains(&crate::options::DumpLIR::alloc_regs) {
1640-
println!("LIR live_intervals:\n{}", crate::backend::lir::debug_intervals(&asm, &intervals));
1641-
1642-
println!("VReg assignments:");
1643-
for (i, alloc) in assignments.iter().enumerate() {
1644-
if let Some(alloc) = alloc {
1645-
let range = &intervals[i].range;
1646-
let alloc_str = match alloc {
1647-
Allocation::Reg(n) => format!("{}", regs[*n]),
1648-
Allocation::Fixed(reg) => format!("{}", reg),
1649-
Allocation::Stack(n) => format!("Stack[{}]", n),
1650-
};
1651-
println!(" v{} => {} (range: {:?}..{:?})", i, alloc_str, range.start, range.end);
1638+
// Dump vreg-to-physical-register mapping if requested
1639+
if let Some(crate::options::Options { dump_lir: Some(dump_lirs), .. }) = unsafe { crate::options::OPTIONS.as_ref() } {
1640+
if dump_lirs.contains(&crate::options::DumpLIR::alloc_regs) {
1641+
println!("LIR live_intervals:\n{}", crate::backend::lir::debug_intervals(&asm, &intervals));
1642+
1643+
println!("VReg assignments:");
1644+
for (i, alloc) in assignments.iter().enumerate() {
1645+
if let Some(alloc) = alloc {
1646+
let range = &intervals[i].range;
1647+
let alloc_str = match alloc {
1648+
Allocation::Reg(n) => format!("{}", regs[*n]),
1649+
Allocation::Fixed(reg) => format!("{}", reg),
1650+
Allocation::Stack(n) => format!("Stack[{}]", n),
1651+
};
1652+
println!(" v{} => {} (range: {:?}..{:?})", i, alloc_str, range.start, range.end);
1653+
}
16521654
}
16531655
}
16541656
}
1655-
}
16561657

1657-
// Update FrameSetup slot_count to account for:
1658-
// 1) stack slots reserved for block params (stack_base_idx), and
1659-
// 2) register allocator spills (num_stack_slots).
1660-
for block in asm.basic_blocks.iter_mut() {
1661-
for insn in block.insns.iter_mut() {
1662-
if let Insn::FrameSetup { slot_count, .. } = insn {
1663-
*slot_count = total_stack_slots;
1658+
// Update FrameSetup slot_count to account for:
1659+
// 1) stack slots reserved for block params (stack_base_idx), and
1660+
// 2) register allocator spills (num_stack_slots).
1661+
trace_compile_phase("count_stack_slots", || {
1662+
for block in asm.basic_blocks.iter_mut() {
1663+
for insn in block.insns.iter_mut() {
1664+
if let Insn::FrameSetup { slot_count, .. } = insn {
1665+
*slot_count = total_stack_slots;
1666+
}
1667+
}
16641668
}
1665-
}
1666-
}
1669+
});
1670+
1671+
trace_compile_phase("resolve_ssa", || {
1672+
asm.handle_caller_saved_regs(&intervals, &assignments, &C_ARG_REGREGS);
1673+
asm.resolve_ssa(&intervals, &assignments);
1674+
});
16671675

1668-
asm.handle_caller_saved_regs(&intervals, &assignments, &C_ARG_REGREGS);
1669-
asm.resolve_ssa(&intervals, &assignments);
1676+
Ok(())
1677+
})?;
16701678
asm_dump!(asm, alloc_regs);
16711679

16721680
// We are moved out of SSA after resolve_ssa
16731681

16741682
// We put compile_exits after alloc_regs to avoid extending live ranges for VRegs spilled on side exits.
16751683
// Exit code is compiled into a separate list of instructions that we append
16761684
// to the last reachable block before scratch_split, so it gets linearized and split.
1677-
let exit_insns = asm.compile_exits();
1678-
asm_dump!(asm, compile_exits);
1679-
1680-
// Append exit instructions to the last reachable block so they are
1681-
// included in linearize_instructions and processed by scratch_split.
1682-
if let Some(&last_block) = asm.block_order().last() {
1683-
for insn in exit_insns {
1684-
asm.basic_blocks[last_block.0].insns.push(insn);
1685-
asm.basic_blocks[last_block.0].insn_ids.push(None);
1685+
trace_compile_phase("compile_exits", || {
1686+
let exit_insns = asm.compile_exits();
1687+
1688+
// Append exit instructions to the last reachable block so they are
1689+
// included in linearize_instructions and processed by scratch_split.
1690+
if let Some(&last_block) = asm.block_order().last() {
1691+
for insn in exit_insns {
1692+
asm.basic_blocks[last_block.0].insns.push(insn);
1693+
asm.basic_blocks[last_block.0].insn_ids.push(None);
1694+
}
16861695
}
1687-
}
1696+
});
1697+
asm_dump!(asm, compile_exits);
16881698

16891699
if use_scratch_reg {
1690-
asm = asm.arm64_scratch_split();
1700+
asm = trace_compile_phase("scratch_split", || asm.arm64_scratch_split());
16911701
asm_dump!(asm, scratch_split);
16921702
} else {
16931703
// For trampolines that use scratch registers, resolve ParallelMov without scratch_reg.
1694-
asm = asm.resolve_parallel_mov_pass();
1704+
asm = trace_compile_phase("resolve_parallel_mov", || asm.resolve_parallel_mov_pass());
16951705
asm_dump!(asm, resolve_parallel_mov);
16961706
}
16971707

1698-
// Create label instances in the code block
1699-
for (idx, name) in asm.label_names.iter().enumerate() {
1700-
let label = cb.new_label(name.to_string());
1701-
assert_eq!(label, Label(idx));
1702-
}
1708+
trace_compile_phase("emit", || {
1709+
// Create label instances in the code block
1710+
for (idx, name) in asm.label_names.iter().enumerate() {
1711+
let label = cb.new_label(name.to_string());
1712+
assert_eq!(label, Label(idx));
1713+
}
17031714

1704-
let start_ptr = cb.get_write_ptr();
1705-
let gc_offsets = asm.arm64_emit(cb).inspect_err(|_| cb.clear_labels())?;
1706-
assert!(!cb.has_dropped_bytes(), "emit should not drop bytes without error");
1715+
let start_ptr = cb.get_write_ptr();
1716+
let gc_offsets = asm.arm64_emit(cb).inspect_err(|_| cb.clear_labels())?;
1717+
assert!(!cb.has_dropped_bytes(), "emit should not drop bytes without error");
17071718

1708-
cb.link_labels().or(Err(CompileError::LabelLinkingFailure))?;
1719+
cb.link_labels().or(Err(CompileError::LabelLinkingFailure))?;
17091720

1710-
// Invalidate icache for newly written out region so we don't run stale code.
1711-
unsafe { rb_jit_icache_invalidate(start_ptr.raw_ptr(cb) as _, cb.get_write_ptr().raw_ptr(cb) as _) };
1721+
// Invalidate icache for newly written out region so we don't run stale code.
1722+
unsafe { rb_jit_icache_invalidate(start_ptr.raw_ptr(cb) as _, cb.get_write_ptr().raw_ptr(cb) as _) };
17121723

1713-
Ok((start_ptr, gc_offsets))
1724+
Ok((start_ptr, gc_offsets))
1725+
})
17141726
}
17151727
}
17161728

zjit/src/backend/x86_64/mod.rs

Lines changed: 77 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use std::mem;
33
use crate::asm::*;
44
use crate::asm::x86_64::*;
55
use crate::codegen::split_patch_point;
6-
use crate::stats::CompileError;
6+
use crate::stats::{CompileError, trace_compile_phase};
77
use crate::virtualmem::CodePtr;
88
use crate::cruby::*;
99
use crate::backend::lir::*;
@@ -1143,103 +1143,115 @@ impl Assembler {
11431143
let use_scratch_regs = !self.accept_scratch_reg;
11441144
asm_dump!(self, init);
11451145

1146-
let mut asm = self.x86_split();
1146+
let mut asm = trace_compile_phase("split", || self.x86_split());
11471147

11481148
asm_dump!(asm, split);
11491149

1150-
asm.number_instructions(0);
1150+
trace_compile_phase("regalloc", || {
1151+
trace_compile_phase("number_instructions", || asm.number_instructions(0));
11511152

1152-
let live_in = asm.analyze_liveness();
1153-
let intervals = asm.build_intervals(live_in);
1153+
let live_in = trace_compile_phase("analyze_liveness", || asm.analyze_liveness());
1154+
let intervals = trace_compile_phase("build_intervals", || asm.build_intervals(live_in));
11541155

1155-
// Dump live intervals if requested
1156-
if let Some(crate::options::Options { dump_lir: Some(dump_lirs), .. }) = unsafe { crate::options::OPTIONS.as_ref() } {
1157-
if dump_lirs.contains(&crate::options::DumpLIR::live_intervals) {
1158-
println!("LIR live_intervals:\n{}", crate::backend::lir::debug_intervals(&asm, &intervals));
1156+
// Dump live intervals if requested
1157+
if let Some(crate::options::Options { dump_lir: Some(dump_lirs), .. }) = unsafe { crate::options::OPTIONS.as_ref() } {
1158+
if dump_lirs.contains(&crate::options::DumpLIR::live_intervals) {
1159+
println!("LIR live_intervals:\n{}", crate::backend::lir::debug_intervals(&asm, &intervals));
1160+
}
11591161
}
1160-
}
11611162

1162-
let preferred_registers = asm.preferred_register_assignments(&intervals);
1163-
let (assignments, num_stack_slots) = asm.linear_scan(intervals.clone(), regs.len(), &preferred_registers);
1163+
let preferred_registers = trace_compile_phase("preferred_registers", || asm.preferred_register_assignments(&intervals));
1164+
let (assignments, num_stack_slots) = trace_compile_phase("linear_scan", || asm.linear_scan(intervals.clone(), regs.len(), &preferred_registers));
11641165

1165-
let total_stack_slots = asm.stack_base_idx + num_stack_slots;
1166-
if total_stack_slots > Self::MAX_FRAME_STACK_SLOTS {
1167-
return Err(CompileError::NativeStackTooLarge);
1168-
}
1166+
let total_stack_slots = asm.stack_base_idx + num_stack_slots;
1167+
if total_stack_slots > Self::MAX_FRAME_STACK_SLOTS {
1168+
return Err(CompileError::NativeStackTooLarge);
1169+
}
11691170

1170-
// Dump vreg-to-physical-register mapping if requested
1171-
if let Some(crate::options::Options { dump_lir: Some(dump_lirs), .. }) = unsafe { crate::options::OPTIONS.as_ref() } {
1172-
if dump_lirs.contains(&crate::options::DumpLIR::alloc_regs) {
1173-
println!("LIR live_intervals:\n{}", crate::backend::lir::debug_intervals(&asm, &intervals));
1174-
1175-
println!("VReg assignments:");
1176-
for (i, alloc) in assignments.iter().enumerate() {
1177-
if let Some(alloc) = alloc {
1178-
let range = &intervals[i].range;
1179-
let alloc_str = match alloc {
1180-
Allocation::Reg(n) => format!("{}", regs[*n]),
1181-
Allocation::Fixed(reg) => format!("{}", reg),
1182-
Allocation::Stack(n) => format!("Stack[{}]", n),
1183-
};
1184-
println!(" v{} => {} (range: {:?}..{:?})", i, alloc_str, range.start, range.end);
1171+
// Dump vreg-to-physical-register mapping if requested
1172+
if let Some(crate::options::Options { dump_lir: Some(dump_lirs), .. }) = unsafe { crate::options::OPTIONS.as_ref() } {
1173+
if dump_lirs.contains(&crate::options::DumpLIR::alloc_regs) {
1174+
println!("LIR live_intervals:\n{}", crate::backend::lir::debug_intervals(&asm, &intervals));
1175+
1176+
println!("VReg assignments:");
1177+
for (i, alloc) in assignments.iter().enumerate() {
1178+
if let Some(alloc) = alloc {
1179+
let range = &intervals[i].range;
1180+
let alloc_str = match alloc {
1181+
Allocation::Reg(n) => format!("{}", regs[*n]),
1182+
Allocation::Fixed(reg) => format!("{}", reg),
1183+
Allocation::Stack(n) => format!("Stack[{}]", n),
1184+
};
1185+
println!(" v{} => {} (range: {:?}..{:?})", i, alloc_str, range.start, range.end);
1186+
}
11851187
}
11861188
}
11871189
}
1188-
}
11891190

1190-
// Update FrameSetup slot_count to account for:
1191-
// 1) stack slots reserved for block params (stack_base_idx), and
1192-
// 2) register allocator spills (num_stack_slots).
1193-
for block in asm.basic_blocks.iter_mut() {
1194-
for insn in block.insns.iter_mut() {
1195-
if let Insn::FrameSetup { slot_count, .. } = insn {
1196-
*slot_count = total_stack_slots;
1191+
// Update FrameSetup slot_count to account for:
1192+
// 1) stack slots reserved for block params (stack_base_idx), and
1193+
// 2) register allocator spills (num_stack_slots).
1194+
trace_compile_phase("count_stack_slots", || {
1195+
for block in asm.basic_blocks.iter_mut() {
1196+
for insn in block.insns.iter_mut() {
1197+
if let Insn::FrameSetup { slot_count, .. } = insn {
1198+
*slot_count = total_stack_slots;
1199+
}
1200+
}
11971201
}
1198-
}
1199-
}
1202+
});
12001203

1201-
asm.handle_caller_saved_regs(&intervals, &assignments, &C_ARG_REGREGS);
1202-
asm.resolve_ssa(&intervals, &assignments);
1204+
trace_compile_phase("resolve_ssa", || {
1205+
asm.handle_caller_saved_regs(&intervals, &assignments, &C_ARG_REGREGS);
1206+
asm.resolve_ssa(&intervals, &assignments);
1207+
});
1208+
1209+
Ok(())
1210+
})?;
12031211
asm_dump!(asm, alloc_regs);
12041212

12051213
// We are moved out of SSA after resolve_ssa
12061214

12071215
// We put compile_exits after alloc_regs to avoid extending live ranges for VRegs spilled on side exits.
12081216
// Exit code is compiled into a separate list of instructions that we append
12091217
// to the last reachable block before scratch_split, so it gets linearized and split.
1210-
let exit_insns = asm.compile_exits();
1211-
asm_dump!(asm, compile_exits);
1212-
1213-
// Append exit instructions to the last reachable block so they are
1214-
// included in linearize_instructions and processed by scratch_split.
1215-
if let Some(&last_block) = asm.block_order().last() {
1216-
for insn in exit_insns {
1217-
asm.basic_blocks[last_block.0].insns.push(insn);
1218-
asm.basic_blocks[last_block.0].insn_ids.push(None);
1218+
trace_compile_phase("compile_exits", || {
1219+
let exit_insns = asm.compile_exits();
1220+
1221+
// Append exit instructions to the last reachable block so they are
1222+
// included in linearize_instructions and processed by scratch_split.
1223+
if let Some(&last_block) = asm.block_order().last() {
1224+
for insn in exit_insns {
1225+
asm.basic_blocks[last_block.0].insns.push(insn);
1226+
asm.basic_blocks[last_block.0].insn_ids.push(None);
1227+
}
12191228
}
1220-
}
1229+
});
1230+
asm_dump!(asm, compile_exits);
12211231

12221232
if use_scratch_regs {
1223-
asm = asm.x86_scratch_split();
1233+
asm = trace_compile_phase("scratch_split", || asm.x86_scratch_split());
12241234
asm_dump!(asm, scratch_split);
12251235
} else {
12261236
// For trampolines that use scratch registers, resolve ParallelMov without scratch_reg.
1227-
asm = asm.resolve_parallel_mov_pass();
1237+
asm = trace_compile_phase("resolve_parallel_mov", || asm.resolve_parallel_mov_pass());
12281238
asm_dump!(asm, resolve_parallel_mov);
12291239
}
12301240

1231-
// Create label instances in the code block
1232-
for (idx, name) in asm.label_names.iter().enumerate() {
1233-
let label = cb.new_label(name.to_string());
1234-
assert_eq!(label, Label(idx));
1235-
}
1241+
trace_compile_phase("emit", || {
1242+
// Create label instances in the code block
1243+
for (idx, name) in asm.label_names.iter().enumerate() {
1244+
let label = cb.new_label(name.to_string());
1245+
assert_eq!(label, Label(idx));
1246+
}
12361247

1237-
let start_ptr = cb.get_write_ptr();
1238-
let gc_offsets = asm.x86_emit(cb).inspect_err(|_| cb.clear_labels())?;
1239-
assert!(!cb.has_dropped_bytes(), "emit should not drop bytes without error");
1248+
let start_ptr = cb.get_write_ptr();
1249+
let gc_offsets = asm.x86_emit(cb).inspect_err(|_| cb.clear_labels())?;
1250+
assert!(!cb.has_dropped_bytes(), "emit should not drop bytes without error");
12401251

1241-
cb.link_labels().or(Err(CompileError::LabelLinkingFailure))?;
1242-
Ok((start_ptr, gc_offsets))
1252+
cb.link_labels().or(Err(CompileError::LabelLinkingFailure))?;
1253+
Ok((start_ptr, gc_offsets))
1254+
})
12431255
}
12441256
}
12451257

0 commit comments

Comments
 (0)