Skip to content

Commit 8a5fc14

Browse files
authored
ZJIT: Allow --zjit-perf to dump perf symbols for HIR (ruby#16501)
1 parent cec348c commit 8a5fc14

5 files changed

Lines changed: 90 additions & 19 deletions

File tree

misc/jit_perf.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,7 @@ def categorize_symbol(dso, symbol):
2222
return '[sha256]'
2323
elif symbol.startswith('[JIT] gen_send'):
2424
return '[JIT send]'
25-
# TODO: Stop using zjit:: as the prefix for JIT code. Rust modules and JIT code should use different namespaces.
26-
elif symbol.startswith('[JIT]') or (symbol.startswith('zjit::') and '@') or symbol == 'zjit::ZJIT entry trampoline':
25+
elif symbol.startswith('[JIT]') or symbol.startswith('ZJIT: ') or dso.startswith('perf-'):
2726
return '[JIT code]'
2827
elif '::' in symbol or symbol.startswith('_ZN4yjit') or symbol.startswith('_ZN4zjit'):
2928
return '[JIT compile]'

zjit/src/backend/arm64/mod.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1734,6 +1734,13 @@ mod tests {
17341734
(asm, CodeBlock::new_dummy())
17351735
}
17361736

1737+
fn setup_asm_with_scratch_reg() -> (Assembler, CodeBlock, Opnd) {
1738+
crate::options::rb_zjit_prepare_options(); // Allow `get_option!` in Assembler
1739+
let (mut asm, scratch_reg) = Assembler::new_with_scratch_reg();
1740+
asm.new_block_without_id("test");
1741+
(asm, CodeBlock::new_dummy(), scratch_reg)
1742+
}
1743+
17371744
#[test]
17381745
fn test_lir_string() {
17391746
use crate::hir::SideExitReason;
@@ -2159,9 +2166,7 @@ mod tests {
21592166

21602167
#[test]
21612168
fn test_store_with_valid_scratch_reg() {
2162-
let (mut asm, scratch_reg) = Assembler::new_with_scratch_reg();
2163-
asm.new_block_without_id("test");
2164-
let mut cb = CodeBlock::new_dummy();
2169+
let (mut asm, mut cb, scratch_reg) = setup_asm_with_scratch_reg();
21652170
asm.store(Opnd::mem(64, scratch_reg, 0), 0x83902.into());
21662171

21672172
asm.compile_with_num_regs(&mut cb, 0);

zjit/src/backend/lir.rs

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@ use std::panic;
55
use std::rc::Rc;
66
use std::sync::{Arc, Mutex};
77
use crate::bitset::BitSet;
8-
use crate::codegen::local_size_and_idx_to_ep_offset;
8+
use crate::codegen::{local_size_and_idx_to_ep_offset, perf_symbol_range_start, perf_symbol_range_end};
99
use crate::cruby::{Qundef, RUBY_OFFSET_CFP_PC, RUBY_OFFSET_CFP_SP, SIZEOF_VALUE_I32, vm_stack_canary};
1010
use crate::hir::{Invariant, SideExitReason};
1111
use crate::hir;
12-
use crate::options::{TraceExits, get_option};
12+
use crate::options::{TraceExits, PerfMap, get_option};
1313
use crate::cruby::VALUE;
1414
use crate::payload::IseqVersionRef;
1515
use crate::stats::{exit_counter_ptr, exit_counter_ptr_for_opcode, side_exit_counter, CompileError};
@@ -2675,6 +2675,13 @@ impl Assembler
26752675
// Map from SideExit to compiled Label. This table is used to deduplicate side exit code.
26762676
let mut compiled_exits: HashMap<SideExit, Label> = HashMap::new();
26772677

2678+
// Start a new perf range for side exits
2679+
let perf_symbol = if get_option!(perf) == Some(PerfMap::HIR) {
2680+
Some(perf_symbol_range_start(self, "side exit"))
2681+
} else {
2682+
None
2683+
};
2684+
26782685
// Mark the start of side-exit code so we can measure its size
26792686
if !targets.is_empty() {
26802687
self.pos_marker(move |start_pos, cb| {
@@ -2756,6 +2763,11 @@ impl Assembler
27562763
crate::stats::incr_counter_by(crate::stats::Counter::compile_side_exit_time_ns, nanos as u64);
27572764
}
27582765

2766+
// Close the current perf range for side exits
2767+
if let Some(perf_symbol) = &perf_symbol {
2768+
perf_symbol_range_end(self, perf_symbol);
2769+
}
2770+
27592771
// Extract exit instructions and restore the previous current block
27602772
let exit_insns = take(&mut self.basic_blocks[exit_block.0].insns);
27612773
self.set_current_block(saved_block);

zjit/src/codegen.rs

Lines changed: 52 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use crate::backend::lir::{self, Assembler, C_ARG_OPNDS, C_RET_OPND, CFP, EC, NAT
2323
use crate::hir::{iseq_to_hir, BlockId, Invariant, RangeType, SideExitReason::{self, *}, SpecialBackrefSymbol, SpecialObjectType};
2424
use crate::hir::{Const, FrameState, Function, Insn, InsnId, SendFallbackReason};
2525
use crate::hir_type::{types, Type};
26-
use crate::options::get_option;
26+
use crate::options::{get_option, PerfMap};
2727
use crate::cast::IntoUsize;
2828

2929
/// At the moment, we support recompiling each ISEQ only once.
@@ -211,16 +211,16 @@ pub fn gen_iseq_call(cb: &mut CodeBlock, iseq_call: &IseqCallRef) -> Result<(),
211211
}
212212

213213
/// Write an entry to the perf map in /tmp
214-
fn register_with_perf(iseq_name: String, start_ptr: usize, code_size: usize) {
214+
fn register_with_perf(symbol_name: String, start_ptr: usize, code_size: usize) {
215215
use std::io::Write;
216216
let perf_map = format!("/tmp/perf-{}.map", std::process::id());
217217
let Ok(file) = std::fs::OpenOptions::new().create(true).append(true).open(&perf_map) else {
218218
debug!("Failed to open perf map file: {perf_map}");
219219
return;
220220
};
221221
let mut file = std::io::BufWriter::new(file);
222-
let Ok(_) = writeln!(file, "{start_ptr:#x} {code_size:#x} zjit::{iseq_name}") else {
223-
debug!("Failed to write {iseq_name} to perf map file: {perf_map}");
222+
let Ok(_) = writeln!(file, "{start_ptr:#x} {code_size:#x} ZJIT: {symbol_name}") else {
223+
debug!("Failed to write {symbol_name} to perf map file: {perf_map}");
224224
return;
225225
};
226226
}
@@ -244,11 +244,11 @@ pub fn gen_entry_trampoline(cb: &mut CodeBlock) -> Result<CodePtr, CompileError>
244244

245245
let (code_ptr, gc_offsets) = asm.compile(cb)?;
246246
assert!(gc_offsets.is_empty());
247-
if get_option!(perf) {
247+
if get_option!(perf).is_some() {
248248
let start_ptr = code_ptr.raw_addr(cb);
249249
let end_ptr = cb.get_write_ptr().raw_addr(cb);
250250
let code_size = end_ptr - start_ptr;
251-
register_with_perf("ZJIT entry trampoline".into(), start_ptr, code_size);
251+
register_with_perf("entry trampoline".into(), start_ptr, code_size);
252252
}
253253
Ok(code_ptr)
254254
}
@@ -448,7 +448,23 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, version: IseqVersionRef, func
448448
assert!(insn_idx == block.insns().len() - 1, "Jump must be the last instruction in HIR block");
449449
},
450450
_ => {
451-
if let Err(last_snapshot) = gen_insn(cb, &mut jit, &mut asm, function, insn_id, &insn) {
451+
// Start a new perf range for the HIR instruction. For now, we do this only for
452+
// non-terminator instructions because LIR blocks must end with a terminator instruction.
453+
let perf_symbol = if get_option!(perf) == Some(PerfMap::HIR) && !insn.is_terminator() {
454+
let insn_name = format!("{insn}").split_whitespace().next().unwrap().to_string();
455+
Some(perf_symbol_range_start(&mut asm, &insn_name))
456+
} else {
457+
None
458+
};
459+
460+
let result = gen_insn(cb, &mut jit, &mut asm, function, insn_id, &insn);
461+
462+
// Close the current perf range for the HIR instruction.
463+
if let Some(perf_symbol) = &perf_symbol {
464+
perf_symbol_range_end(&mut asm, perf_symbol);
465+
}
466+
467+
if let Err(last_snapshot) = result {
452468
debug!("ZJIT: gen_function: Failed to compile insn: {insn_id} {insn}. Generating side-exit.");
453469
gen_incr_counter(&mut asm, exit_counter_for_unhandled_hir_insn(&insn));
454470
gen_side_exit(&mut jit, &mut asm, &SideExitReason::UnhandledHIRInsn(insn_id), &function.frame_state(last_snapshot));
@@ -472,7 +488,7 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, version: IseqVersionRef, func
472488
// Generate code if everything can be compiled
473489
let result = asm.compile(cb);
474490
if let Ok((start_ptr, _)) = result {
475-
if get_option!(perf) {
491+
if get_option!(perf) == Some(PerfMap::ISEQ) {
476492
let start_usize = start_ptr.raw_addr(cb);
477493
let end_usize = cb.get_write_ptr().raw_addr(cb);
478494
let code_size = end_usize - start_usize;
@@ -3248,6 +3264,34 @@ impl IseqCall {
32483264
}
32493265
}
32503266

3267+
type PerfSymbol = Rc<RefCell<Option<(CodePtr, String)>>>;
3268+
3269+
/// Mark the start of a perf symbol range via pos_marker.
3270+
/// Returns a handle to pass to perf_symbol_range_end.
3271+
pub fn perf_symbol_range_start(asm: &mut Assembler, symbol_name: &str) -> PerfSymbol {
3272+
let symbol_name = symbol_name.to_string();
3273+
let perf_symbol: PerfSymbol = Rc::new(RefCell::new(None));
3274+
let current = perf_symbol.clone();
3275+
asm.pos_marker(move |start, _| {
3276+
let mut current = current.borrow_mut();
3277+
assert!(current.is_none(), "perf symbol range already open");
3278+
*current = Some((start, symbol_name.clone()));
3279+
});
3280+
perf_symbol
3281+
}
3282+
3283+
/// Mark the end of a perf symbol range via pos_marker.
3284+
pub fn perf_symbol_range_end(asm: &mut Assembler, perf_symbol: &PerfSymbol) {
3285+
let current = perf_symbol.clone();
3286+
asm.pos_marker(move |end, cb| {
3287+
if let Some((start, name)) = current.borrow_mut().take() {
3288+
let start_addr = start.raw_addr(cb);
3289+
let code_size = end.raw_addr(cb) - start_addr;
3290+
register_with_perf(name, start_addr, code_size);
3291+
}
3292+
});
3293+
}
3294+
32513295
#[cfg(test)]
32523296
#[path = "codegen_tests.rs"]
32533297
mod tests;

zjit/src/options.rs

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,15 @@ use crate::cruby::*;
66
use crate::stats::Counter;
77
use std::collections::HashSet;
88

9+
/// Type of symbols to dump into /tmp/perf-{pid}.map
10+
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
11+
pub enum PerfMap {
12+
/// Dump one symbol per ISEQ
13+
ISEQ,
14+
/// Dump one symbol per HIR instruction
15+
HIR,
16+
}
17+
918
/// Default --zjit-num-profiles
1019
const DEFAULT_NUM_PROFILES: NumProfiles = 5;
1120
pub type NumProfiles = u16;
@@ -89,7 +98,7 @@ pub struct Options {
8998
pub trace_side_exits_sample_interval: usize,
9099

91100
/// Dump code map to /tmp for performance profilers.
92-
pub perf: bool,
101+
pub perf: Option<PerfMap>,
93102

94103
/// List of ISEQs that can be compiled, identified by their iseq_get_location()
95104
pub allowed_iseqs: Option<HashSet<String>>,
@@ -118,7 +127,7 @@ impl Default for Options {
118127
dump_disasm: None,
119128
trace_side_exits: None,
120129
trace_side_exits_sample_interval: 0,
121-
perf: false,
130+
perf: None,
122131
allowed_iseqs: None,
123132
log_compiled_iseqs: None,
124133
}
@@ -141,7 +150,8 @@ pub const ZJIT_OPTIONS: &[(&str, &str)] = &[
141150
"Collect ZJIT stats (=file to write to a file)."),
142151
("--zjit-disable",
143152
"Disable ZJIT for lazily enabling it with RubyVM::ZJIT.enable."),
144-
("--zjit-perf", "Dump ISEQ symbols into /tmp/perf-{}.map for Linux perf."),
153+
("--zjit-perf[=iseq|hir]",
154+
"Dump symbols for Linux perf /tmp/perf-{}.map (default: iseq)."),
145155
("--zjit-log-compiled-iseqs=path",
146156
"Log compiled ISEQs to the file. The file will be truncated."),
147157
("--zjit-trace-exits[=counter]",
@@ -452,7 +462,8 @@ fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
452462
}
453463
}
454464

455-
("perf", "") => options.perf = true,
465+
("perf", "" | "iseq") => options.perf = Some(PerfMap::ISEQ),
466+
("perf", "hir") => options.perf = Some(PerfMap::HIR),
456467

457468
("allowed-iseqs", _) if !opt_val.is_empty() => options.allowed_iseqs = Some(parse_jit_list(opt_val)),
458469
("log-compiled-iseqs", _) if !opt_val.is_empty() => {

0 commit comments

Comments
 (0)