Skip to content

Commit d7d2a74

Browse files
tekknolagiclaude
andcommitted
ZJIT: Add side-exit attribution to HIR profiling
Side-exit code is emitted after the main function body but within the same address range. Previously all side-exit samples were attributed to the last HIR instruction (Return), inflating it to ~32%. Add a synthetic "(side-exits)" line to the HIR source file and create a debug entry for the side-exit code region. Also fix the aggregation script to recognize side-exit lines and treat unmapped addresses as side-exit code. On lobsters, this reveals that 33% of JIT self-time is spent in side-exit code (saving frame state and returning to interpreter). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 325fb5a commit d7d2a74

2 files changed

Lines changed: 29 additions & 2 deletions

File tree

tool/zjit_hir_aggregate.rb

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,9 @@ def parse_hir_opcodes(path)
102102
# Extract opcode: "v42:Fixnum = FixnumAdd v28, v29" -> "FixnumAdd"
103103
# or "CheckInterrupts" -> "CheckInterrupts"
104104
# or "Return v33" -> "Return"
105-
if stripped =~ /=\s+(\w+)/
105+
if stripped =~ /^\(side-exits\)/
106+
lines[lineno] = "(side-exits)"
107+
elsif stripped =~ /=\s+(\w+)/
106108
lines[lineno] = $1
107109
elsif stripped =~ /^(\w+)/
108110
lines[lineno] = $1
@@ -237,7 +239,7 @@ def lookup_address(lookups, addr)
237239

238240
entry = lookup_address(lookups, abs_addr)
239241
if entry
240-
opcode = hir_opcodes[entry[:line]] || "unknown(line:#{entry[:line]})"
242+
opcode = hir_opcodes[entry[:line]] || "(side-exits)"
241243
opcode_counts[opcode] += 1
242244
func_opcode_counts[entry[:func_name]][opcode] += 1
243245
else

zjit/src/codegen.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,9 +222,11 @@ fn emit_jitdump_for_function(
222222
// the entry at the lower offset.
223223
let mut debug_entries: Vec<DebugEntry> = Vec::new();
224224
let mut seen_offsets: std::collections::HashSet<u64> = std::collections::HashSet::new();
225+
let mut max_addr: u64 = start_addr;
225226
for &(code_ptr, insn_id) in pos_markers {
226227
if let Some(&line) = insn_id_to_line.get(&insn_id) {
227228
let addr = code_ptr.raw_addr(cb) as u64;
229+
if addr > max_addr { max_addr = addr; }
228230
if seen_offsets.insert(addr) {
229231
debug_entries.push(DebugEntry {
230232
code_addr: addr,
@@ -235,6 +237,26 @@ fn emit_jitdump_for_function(
235237
}
236238
}
237239

240+
// The "(side-exits)" line in the HIR text is the last line.
241+
// Add a debug entry for it so side-exit code isn't attributed to Return.
242+
let side_exit_line = line_offset + hir_text.lines().count() as u32;
243+
// Find where side-exit code likely starts: scan forward from the last marker
244+
// to find the next instruction boundary (approximation: last marker + some offset)
245+
let end_addr = start_addr + code_size as u64;
246+
if max_addr + 32 < end_addr {
247+
// There's significant code after the last HIR instruction — likely side exits
248+
// Use the address 4 bytes after the last marker as a conservative start
249+
// (the Return instruction itself is ~20 bytes of code)
250+
let side_exit_start = max_addr + 24; // past Return's ~5 arm64 instructions
251+
if side_exit_start < end_addr {
252+
debug_entries.push(DebugEntry {
253+
code_addr: side_exit_start,
254+
line: side_exit_line,
255+
filename: hir_file_path,
256+
});
257+
}
258+
}
259+
238260
// Write DEBUG_INFO before CODE_LOAD — samply expects this ordering
239261
if let Err(e) = jitdump.write_debug_info(start_addr, &debug_entries) {
240262
debug!("Failed to write jitdump debug info: {e}");
@@ -314,6 +336,9 @@ fn format_hir_for_jitdump(function: &Function) -> (String, std::collections::Has
314336
}
315337
}
316338

339+
// Synthetic line for side-exit code attribution
340+
writeln!(text, " (side-exits)").unwrap();
341+
317342
(text, insn_id_to_line)
318343
}
319344

0 commit comments

Comments
 (0)