Skip to content

Commit 83da22d

Browse files
committed
ZJIT: Inline type feedback for NoProfile sends
Add runtime inline profiling for sends that lacked interpreter profile data. JIT code emits guarded ccalls to rb_zjit_inline_profile_send which records receiver types into the profiling data structure. After enough observations, triggers recompilation with a quality gate (>=50% monomorphic/skewed) so V2 can specialize the sends. - rb_zjit_inline_profile_send with self-disabling counter and quality gate - rb_zjit_count_ivar_fallback for not_monomorphic ivar recompilation - gen_guarded_inline_profile with assembly-level skip guard - trigger_recompilation gains preserve_profiles parameter - reset_counters_for_recompile preserves type distributions - Inline profiling in gen_send/gen_send_without_block/gen_getivar/gen_setivar - has_inline_feedback skips deferral for inline-triggered recompilations
1 parent 7548a8a commit 83da22d

4 files changed

Lines changed: 245 additions & 12 deletions

File tree

zjit/src/codegen.rs

Lines changed: 165 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ use crate::hir::{iseq_to_hir, BlockId, Invariant, RangeType, SideExitReason::{se
2525
use crate::hir::{Const, FrameState, Function, Insn, InsnId, SendFallbackReason};
2626
use crate::hir_type::{types, Type};
2727
use crate::options::{get_option, rb_zjit_call_threshold};
28+
use crate::profile::ProfiledType;
2829
use crate::cast::IntoUsize;
2930

3031
/// At the moment, we support recompiling each ISEQ only once.
@@ -44,7 +45,7 @@ pub extern "C" fn rb_zjit_count_side_exit(payload_raw: *mut std::ffi::c_void) {
4445
None => return,
4546
};
4647
with_vm_lock(src_loc!(), || {
47-
trigger_recompilation(payload_raw, iseq);
48+
trigger_recompilation(payload_raw, iseq, true);
4849
});
4950
}
5051
}
@@ -62,7 +63,9 @@ fn deferred_threshold(defer_count: u32) -> u32 {
6263
}
6364
}
6465

65-
fn trigger_recompilation(payload_raw: *mut std::ffi::c_void, iseq: IseqPtr) {
66+
/// When `preserve_profiles` is true, only counters are reset (type distributions survive).
67+
/// When false, both counters and type distributions are cleared.
68+
fn trigger_recompilation(payload_raw: *mut std::ffi::c_void, iseq: IseqPtr, preserve_profiles: bool) {
6669
if MAX_GLOBAL_RECOMPILATIONS > 0 {
6770
let prev = GLOBAL_RECOMPILE_COUNT.fetch_add(1, Ordering::Relaxed);
6871
if prev >= MAX_GLOBAL_RECOMPILATIONS {
@@ -71,9 +74,13 @@ fn trigger_recompilation(payload_raw: *mut std::ffi::c_void, iseq: IseqPtr) {
7174
}
7275
}
7376
let payload = unsafe { &mut *(payload_raw as *mut IseqPayload) };
74-
debug!("trigger_recompilation: recompiling {}", iseq_get_location(iseq, 0));
77+
debug!("trigger_recompilation: recompiling {} (preserve_profiles={})", iseq_get_location(iseq, 0), preserve_profiles);
7578
incr_counter!(recompile_count);
76-
payload.profile.reset_for_recompile();
79+
if preserve_profiles {
80+
payload.profile.reset_counters_for_recompile();
81+
} else {
82+
payload.profile.reset_for_recompile();
83+
}
7784

7885
// Reset deferral state so V2 compilation goes straight to building the HIR.
7986
// If the HIR still has unresolved issues, the post-HIR deferral trigger handles escalation.
@@ -88,6 +95,76 @@ fn trigger_recompilation(payload_raw: *mut std::ffi::c_void, iseq: IseqPtr) {
8895
unsafe { rb_zjit_profile_enable(iseq) };
8996
}
9097

98+
/// Runtime helper called from JIT code to collect inline type feedback for NoProfile sends.
99+
/// When a NoProfile send executes, this records the receiver's class into the profiling data
100+
/// structure. After enough observations, triggers recompilation so the previously-NoProfile
101+
/// sends compile to direct calls using the collected type data.
102+
#[unsafe(no_mangle)]
103+
pub extern "C" fn rb_zjit_inline_profile_send(
104+
payload_raw: *mut std::ffi::c_void,
105+
insn_idx: u64,
106+
recv: VALUE,
107+
n_operands: u64,
108+
) {
109+
if payload_raw.is_null() { return; }
110+
let payload = unsafe { &mut *(payload_raw as *mut IseqPayload) };
111+
let insn_idx = insn_idx as usize;
112+
113+
let threshold = (get_option!(recompile_threshold) as u64) / 2;
114+
if threshold == 0 || payload.no_profile_send_hits >= threshold { return; }
115+
116+
payload.no_profile_send_hits += 1;
117+
118+
if payload.no_profile_send_hits == threshold && payload.versions.len() < MAX_ISEQ_VERSIONS {
119+
if !payload.profile.inline_feedback_is_high_quality() {
120+
return;
121+
}
122+
payload.has_inline_feedback = true;
123+
let iseq = match payload.versions.last() {
124+
Some(version_ref) => unsafe { version_ref.as_ref() }.iseq,
125+
None => return,
126+
};
127+
with_vm_lock(src_loc!(), || {
128+
trigger_recompilation(payload_raw, iseq, true);
129+
});
130+
return;
131+
}
132+
133+
const INLINE_PROFILE_LIMIT: u32 = 5;
134+
if payload.profile.num_profiles_for(insn_idx) >= INLINE_PROFILE_LIMIT { return; }
135+
136+
let ty = ProfiledType::new(recv);
137+
if let Some(version_ref) = payload.versions.last() {
138+
let iseq = unsafe { version_ref.as_ref() }.iseq;
139+
VALUE::from(iseq).write_barrier(ty.class());
140+
}
141+
payload.profile.observe_receiver(insn_idx, n_operands as usize, ty);
142+
payload.profile.increment_num_profiles(insn_idx);
143+
}
144+
145+
/// Lightweight runtime helper for not_monomorphic ivar fallbacks.
146+
/// Only increments the recompilation trigger counter — no type recording.
147+
#[unsafe(no_mangle)]
148+
pub extern "C" fn rb_zjit_count_ivar_fallback(payload_raw: *mut std::ffi::c_void) {
149+
if payload_raw.is_null() { return; }
150+
let payload = unsafe { &mut *(payload_raw as *mut IseqPayload) };
151+
152+
let threshold = get_option!(recompile_threshold) as u64;
153+
if threshold == 0 || payload.no_profile_send_hits >= threshold { return; }
154+
155+
payload.no_profile_send_hits += 1;
156+
157+
if payload.no_profile_send_hits == threshold && payload.versions.len() < MAX_ISEQ_VERSIONS {
158+
let iseq = match payload.versions.last() {
159+
Some(version_ref) => unsafe { version_ref.as_ref() }.iseq,
160+
None => return,
161+
};
162+
with_vm_lock(src_loc!(), || {
163+
trigger_recompilation(payload_raw, iseq, true);
164+
});
165+
}
166+
}
167+
91168
unsafe extern "C" {
92169
fn rb_zjit_profile_enable(iseq: IseqPtr);
93170
}
@@ -120,6 +197,11 @@ struct JITState {
120197
iseq_calls: Vec<IseqCallRef>,
121198
payload_ptr: usize,
122199
has_version_budget: bool,
200+
201+
/// Whether inline profiling calls should be emitted for NoProfile sends.
202+
/// False when the ISEQ has too few NoProfile sends to justify the overhead.
203+
/// Set during gen_function based on the HIR's NoProfile send count.
204+
should_emit_inline_profiling: bool,
123205
}
124206

125207
impl JITState {
@@ -136,6 +218,7 @@ impl JITState {
136218
iseq_calls: Vec::default(),
137219
payload_ptr,
138220
has_version_budget,
221+
should_emit_inline_profiling: false, // Set by gen_function after HIR analysis
139222
}
140223
}
141224

@@ -287,10 +370,13 @@ fn gen_iseq_entry_point(cb: &mut CodeBlock, iseq: IseqPtr, jit_exception: bool)
287370
let (no_profile_sends, total_sends) = function.count_no_profile_sends();
288371
let sends_need_deferral = total_sends > 0 && no_profile_sends * 4 > total_sends;
289372
let has_unresolved = sends_need_deferral || function.has_not_monomorphic_ivars();
290-
if is_recompile && payload.defer_count < 2 && has_unresolved {
373+
let skip_deferral = payload.has_inline_feedback;
374+
if is_recompile && payload.defer_count < 2 && has_unresolved && !skip_deferral {
291375
payload.defer_count = 2; // level 2: deferred_threshold(2) = 1K calls
292376
payload.deferred_stub_hits = 0;
293-
payload.profile.reset_for_recompile();
377+
// Preserve inline feedback — only reset counters so the interpreter
378+
// adds observations on top during the 1K-call deferral window.
379+
payload.profile.reset_counters_for_recompile();
294380
unsafe { rb_zjit_profile_enable(iseq) };
295381
unsafe { rb_iseq_reset_jit_func(iseq) };
296382
incr_counter!(recompile_count);
@@ -436,6 +522,14 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, version: IseqVersionRef, func
436522
asm.payload_ptr = Some(jit.payload_ptr);
437523
}
438524

525+
// Enable inline profiling for ISEQs with enough NoProfile sends to justify
526+
// the overhead. ISEQs with <3 NoProfile sends don't benefit from inline
527+
// profiling — the side-exit path is sufficient for recompilation.
528+
if get_option!(recompile_threshold) > 0 && jit.has_version_budget && jit.payload_ptr != 0 {
529+
let (no_profile, _total) = function.count_no_profile_sends();
530+
jit.should_emit_inline_profiling = no_profile >= 3;
531+
}
532+
439533
// Mapping from HIR block IDs to LIR block IDs.
440534
// This is is a one-to-one mapping from HIR to LIR blocks used for finding
441535
// jump targets in LIR (LIR should always jump to the head of an HIR block)
@@ -678,8 +772,8 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
678772
Insn::Param => unreachable!("block.insns should not have Insn::Param"),
679773
Insn::LoadArg { .. } => return Ok(()), // compiled in the LoadArg pre-pass above
680774
Insn::Snapshot { .. } => return Ok(()), // we don't need to do anything for this instruction at the moment
681-
&Insn::Send { cd, blockiseq: None, state, reason, .. } => gen_send_without_block(jit, asm, cd, &function.frame_state(state), reason),
682-
&Insn::Send { cd, blockiseq: Some(blockiseq), state, reason, .. } => gen_send(jit, asm, cd, blockiseq, &function.frame_state(state), reason),
775+
&Insn::Send { cd, blockiseq: None, recv, state, reason, .. } => gen_send_without_block(jit, asm, cd, recv, &function.frame_state(state), reason),
776+
&Insn::Send { cd, blockiseq: Some(blockiseq), recv, state, reason, .. } => gen_send(jit, asm, cd, recv, blockiseq, &function.frame_state(state), reason),
683777
&Insn::SendForward { cd, blockiseq, state, reason, .. } => gen_send_forward(jit, asm, cd, blockiseq, &function.frame_state(state), reason),
684778
Insn::SendDirect { cme, iseq, recv, args, kw_bits, blockiseq, state, .. } => gen_send_iseq_direct(cb, jit, asm, *cme, *iseq, opnd!(recv), opnds!(args), *kw_bits, &function.frame_state(*state), *blockiseq),
685779
&Insn::InvokeSuper { cd, blockiseq, state, reason, .. } => gen_invokesuper(jit, asm, cd, blockiseq, &function.frame_state(state), reason),
@@ -740,8 +834,8 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
740834
Insn::CCall { cfunc, recv, args, name, return_type: _, elidable: _ } => gen_ccall(asm, *cfunc, *name, opnd!(recv), opnds!(args)),
741835
// Give up CCallWithFrame for 7+ args since asm.ccall() supports at most 6 args (recv + args).
742836
// There's no test case for this because no core cfuncs have this many parameters. But C extensions could have such methods.
743-
Insn::CCallWithFrame { cd, state, args, .. } if args.len() + 1 > C_ARG_OPNDS.len() =>
744-
gen_send_without_block(jit, asm, *cd, &function.frame_state(*state), SendFallbackReason::CCallWithFrameTooManyArgs),
837+
Insn::CCallWithFrame { cd, recv, state, args, .. } if args.len() + 1 > C_ARG_OPNDS.len() =>
838+
gen_send_without_block(jit, asm, *cd, *recv, &function.frame_state(*state), SendFallbackReason::CCallWithFrameTooManyArgs),
745839
Insn::CCallWithFrame { cfunc, recv, name, args, cme, state, blockiseq, .. } =>
746840
gen_ccall_with_frame(jit, asm, *cfunc, *name, opnd!(recv), opnds!(args), *cme, *blockiseq, &function.frame_state(*state)),
747841
Insn::CCallVariadic { cfunc, recv, name, args, cme, state, blockiseq, return_type: _, elidable: _ } => {
@@ -1225,6 +1319,12 @@ fn gen_ccall_variadic(
12251319

12261320
/// Emit an uncached instance variable lookup
12271321
fn gen_getivar(jit: &mut JITState, asm: &mut Assembler, recv: Opnd, id: ID, ic: *const iseq_inline_iv_cache_entry) -> Opnd {
1322+
// Count not_monomorphic ivar fallback executions for the recompilation trigger.
1323+
if jit.payload_ptr != 0 && jit.has_version_budget {
1324+
asm_comment!(asm, "count not_monomorphic getivar for recompilation");
1325+
asm_ccall!(asm, rb_zjit_count_ivar_fallback, Opnd::UImm(jit.payload_ptr as u64));
1326+
}
1327+
12281328
if ic.is_null() {
12291329
asm_ccall!(asm, rb_ivar_get, recv, id.0.into())
12301330
} else {
@@ -1235,6 +1335,12 @@ fn gen_getivar(jit: &mut JITState, asm: &mut Assembler, recv: Opnd, id: ID, ic:
12351335

12361336
/// Emit an uncached instance variable store
12371337
fn gen_setivar(jit: &mut JITState, asm: &mut Assembler, recv: Opnd, id: ID, ic: *const iseq_inline_iv_cache_entry, val: Opnd, state: &FrameState) {
1338+
// Count not_monomorphic ivar fallback executions for the recompilation trigger.
1339+
if jit.payload_ptr != 0 && jit.has_version_budget {
1340+
asm_comment!(asm, "count not_monomorphic setivar for recompilation");
1341+
asm_ccall!(asm, rb_zjit_count_ivar_fallback, Opnd::UImm(jit.payload_ptr as u64));
1342+
}
1343+
12381344
// Setting an ivar can raise FrozenError, so we need proper frame state for exception handling.
12391345
gen_prepare_non_leaf_call(jit, asm, state);
12401346
if ic.is_null() {
@@ -1480,16 +1586,57 @@ fn gen_if_false(asm: &mut Assembler, val: lir::Opnd, branch: lir::BranchEdge, fa
14801586
}
14811587

14821588
/// Compile a dynamic dispatch with block
1589+
/// Emit inline type feedback with an assembly-level guard that skips the ccall
1590+
/// when profiling is self-disabled (no_profile_send_hits >= threshold).
1591+
fn gen_guarded_inline_profile(
1592+
jit: &mut JITState,
1593+
asm: &mut Assembler,
1594+
recv_opnd: Opnd,
1595+
insn_idx: u64,
1596+
n_operands: u64,
1597+
) {
1598+
let threshold = (get_option!(recompile_threshold) as u64) / 2;
1599+
if threshold == 0 { return; }
1600+
1601+
let hir_block_id = asm.current_block().hir_block_id;
1602+
let rpo_idx = asm.current_block().rpo_index;
1603+
let skip_block = asm.new_block(hir_block_id, false, rpo_idx);
1604+
let skip_edge = || Target::Block(lir::BranchEdge { target: skip_block, args: vec![] });
1605+
1606+
asm_comment!(asm, "guard: skip inline profiling if self-disabled");
1607+
let payload_addr = asm.load(Opnd::UImm(jit.payload_ptr as u64));
1608+
let offset = std::mem::offset_of!(crate::payload::IseqPayload, no_profile_send_hits) as i32;
1609+
let hits = asm.load(Opnd::mem(64, payload_addr, offset));
1610+
asm.cmp(hits, Opnd::UImm(threshold));
1611+
asm.jge(jit, skip_edge());
1612+
1613+
asm_comment!(asm, "inline type feedback for NoProfile send");
1614+
asm_ccall!(asm, rb_zjit_inline_profile_send, Opnd::UImm(jit.payload_ptr as u64), Opnd::UImm(insn_idx), recv_opnd, Opnd::UImm(n_operands));
1615+
1616+
asm.jmp(skip_edge());
1617+
asm.set_current_block(skip_block);
1618+
let label = jit.get_label(asm, skip_block, hir_block_id);
1619+
asm.write_label(label);
1620+
}
1621+
14831622
fn gen_send(
14841623
jit: &mut JITState,
14851624
asm: &mut Assembler,
14861625
cd: *const rb_call_data,
1626+
recv: InsnId,
14871627
blockiseq: IseqPtr,
14881628
state: &FrameState,
14891629
reason: SendFallbackReason,
14901630
) -> lir::Opnd {
14911631
gen_incr_send_fallback_counter(asm, reason);
14921632

1633+
// Inline type feedback for NoProfile sends
1634+
if matches!(reason, SendFallbackReason::SendNoProfiles) && jit.should_emit_inline_profiling {
1635+
let recv_opnd = jit.get_opnd(recv);
1636+
let n_operands = unsafe { vm_ci_argc((*cd).ci) } as u64 + 1;
1637+
gen_guarded_inline_profile(jit, asm, recv_opnd, state.insn_idx as u64, n_operands);
1638+
}
1639+
14931640
gen_prepare_non_leaf_call(jit, asm, state);
14941641
asm_comment!(asm, "call #{} with dynamic dispatch", ruby_call_method_name(cd));
14951642
unsafe extern "C" {
@@ -1531,11 +1678,19 @@ fn gen_send_without_block(
15311678
jit: &mut JITState,
15321679
asm: &mut Assembler,
15331680
cd: *const rb_call_data,
1681+
recv: InsnId,
15341682
state: &FrameState,
15351683
reason: SendFallbackReason,
15361684
) -> lir::Opnd {
15371685
gen_incr_send_fallback_counter(asm, reason);
15381686

1687+
// Inline type feedback for NoProfile sends
1688+
if matches!(reason, SendFallbackReason::SendWithoutBlockNoProfiles) && jit.should_emit_inline_profiling {
1689+
let recv_opnd = jit.get_opnd(recv);
1690+
let n_operands = unsafe { vm_ci_argc((*cd).ci) } as u64 + 1;
1691+
gen_guarded_inline_profile(jit, asm, recv_opnd, state.insn_idx as u64, n_operands);
1692+
}
1693+
15391694
gen_prepare_non_leaf_call(jit, asm, state);
15401695
asm_comment!(asm, "call #{} with dynamic dispatch", ruby_call_method_name(cd));
15411696
unsafe extern "C" {

zjit/src/hir.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6382,7 +6382,8 @@ impl<'a> std::fmt::Display for FunctionGraphvizPrinter<'a> {
63826382
#[derive(Debug, Clone, PartialEq)]
63836383
pub struct FrameState {
63846384
iseq: IseqPtr,
6385-
insn_idx: usize,
6385+
/// YARV instruction index within the ISEQ
6386+
pub insn_idx: usize,
63866387
// Ruby bytecode instruction pointer
63876388
pub pc: *const VALUE,
63886389

zjit/src/payload.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,13 @@ pub struct IseqPayload {
1818
pub defer_count: u32,
1919
/// How many stub/entry hits have been counted during the current deferral window.
2020
pub deferred_stub_hits: u32,
21+
/// Number of NoProfile send executions observed via inline feedback.
22+
/// Used as the recompilation trigger for inline-feedback-driven recompilation.
23+
pub no_profile_send_hits: u64,
24+
/// Whether this ISEQ's last recompilation was triggered by inline feedback.
25+
/// When true, the post-HIR deferral check in gen_iseq_entry_point is skipped
26+
/// because the preserved inline feedback already provides type data.
27+
pub has_inline_feedback: bool,
2128
}
2229

2330
impl IseqPayload {
@@ -28,6 +35,8 @@ impl IseqPayload {
2835
side_exit_count: 0,
2936
defer_count: 0,
3037
deferred_stub_hits: 0,
38+
no_profile_send_hits: 0,
39+
has_inline_feedback: false,
3140
}
3241
}
3342
}

0 commit comments

Comments
 (0)