Skip to content

Commit 7548a8a

Browse files
committed
ZJIT: Deferral mechanism for cold-path re-profiling
Add escalating deferral for recompilations where the HIR still has unresolved NoProfile sends (>25% ratio) or non-monomorphic ivars. When triggered, the ISEQ returns to the interpreter for 1K calls to collect fresh profile data before V2 compilation. - deferred_threshold() with escalating levels (call_threshold/1K/100K) - Pre-HIR gate in gen_iseq_entry_point and function_stub_hit - Post-HIR quality check triggers deferral on recompilations - count_no_profile_sends/has_not_monomorphic_ivars on Function - defer_count/deferred_stub_hits fields on IseqPayload - trigger_recompilation resets deferral state
1 parent e03bec1 commit 7548a8a

3 files changed

Lines changed: 147 additions & 1 deletion

File tree

zjit/src/codegen.rs

Lines changed: 91 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ use crate::backend::lir::{self, Assembler, C_ARG_OPNDS, C_RET_OPND, CFP, EC, NAT
2424
use crate::hir::{iseq_to_hir, BlockId, Invariant, RangeType, SideExitReason::{self, *}, SpecialBackrefSymbol, SpecialObjectType};
2525
use crate::hir::{Const, FrameState, Function, Insn, InsnId, SendFallbackReason};
2626
use crate::hir_type::{types, Type};
27-
use crate::options::get_option;
27+
use crate::options::{get_option, rb_zjit_call_threshold};
2828
use crate::cast::IntoUsize;
2929

3030
/// At the moment, we support recompiling each ISEQ only once.
@@ -52,6 +52,16 @@ pub extern "C" fn rb_zjit_count_side_exit(payload_raw: *mut std::ffi::c_void) {
5252
static GLOBAL_RECOMPILE_COUNT: AtomicU64 = AtomicU64::new(0);
5353
const MAX_GLOBAL_RECOMPILATIONS: u64 = 50;
5454

55+
/// Escalating threshold for deferred re-profiling. Higher deferral levels
56+
/// give cold branches progressively more time to warm up.
57+
fn deferred_threshold(defer_count: u32) -> u32 {
58+
match defer_count {
59+
1 => unsafe { rb_zjit_call_threshold as u32 },
60+
2 => 1_000,
61+
_ => 100_000,
62+
}
63+
}
64+
5565
fn trigger_recompilation(payload_raw: *mut std::ffi::c_void, iseq: IseqPtr) {
5666
if MAX_GLOBAL_RECOMPILATIONS > 0 {
5767
let prev = GLOBAL_RECOMPILE_COUNT.fetch_add(1, Ordering::Relaxed);
@@ -64,6 +74,12 @@ fn trigger_recompilation(payload_raw: *mut std::ffi::c_void, iseq: IseqPtr) {
6474
debug!("trigger_recompilation: recompiling {}", iseq_get_location(iseq, 0));
6575
incr_counter!(recompile_count);
6676
payload.profile.reset_for_recompile();
77+
78+
// Reset deferral state so V2 compilation goes straight to building the HIR.
79+
// If the HIR still has unresolved issues, the post-HIR deferral trigger handles escalation.
80+
payload.defer_count = 0;
81+
payload.deferred_stub_hits = 0;
82+
6783
if let Some(version) = payload.versions.last_mut() {
6884
let version = unsafe { version.as_mut() };
6985
version.status = IseqStatus::Invalidated;
@@ -232,11 +248,56 @@ fn gen_iseq_entry_point(cb: &mut CodeBlock, iseq: IseqPtr, jit_exception: bool)
232248
return Err(CompileError::ExceptionHandler);
233249
}
234250

251+
// If this ISEQ is in a deferred re-profiling window, don't compile yet.
252+
// Count this interpreter entry toward the threshold and keep the ISEQ
253+
// running in the interpreter with profiling active. Both interpreter
254+
// entries and stub fallbacks count toward the same escalating threshold.
255+
{
256+
let payload = get_or_create_iseq_payload(iseq);
257+
if payload.defer_count > 0 {
258+
let threshold = deferred_threshold(payload.defer_count);
259+
if payload.deferred_stub_hits < threshold {
260+
let call_threshold = unsafe { rb_zjit_call_threshold as u32 };
261+
payload.deferred_stub_hits += call_threshold;
262+
unsafe { rb_iseq_reset_jit_func(iseq) };
263+
return Err(CompileError::DeferredForReprofiling);
264+
}
265+
}
266+
}
267+
235268
// Compile ISEQ into High-level IR
236269
let function = crate::stats::with_time_stat(Counter::compile_hir_time_ns, || compile_iseq(iseq).inspect_err(|_| {
237270
incr_counter!(failed_iseq_count);
238271
}))?;
239272

273+
// Adaptive deferral for recompilations. First compilations never defer.
274+
// For recompilations (latest version invalidated), if the HIR has a
275+
// significant fraction of unresolved sends or any unresolved ivars,
276+
// defer for 1K interpreter calls to exercise cold branches.
277+
// A single dead-branch NoProfile send does NOT trigger deferral —
278+
// ISEQs where most sends are well-profiled compile immediately.
279+
if get_option!(recompile_threshold) > 0 {
280+
let payload = get_or_create_iseq_payload(iseq);
281+
let is_recompile = payload
282+
.versions
283+
.last()
284+
.map(|v| unsafe { v.as_ref() }.status == IseqStatus::Invalidated)
285+
.unwrap_or(false);
286+
// Use ratio-based check for sends: only defer if >25% of sends lack profiles.
287+
let (no_profile_sends, total_sends) = function.count_no_profile_sends();
288+
let sends_need_deferral = total_sends > 0 && no_profile_sends * 4 > total_sends;
289+
let has_unresolved = sends_need_deferral || function.has_not_monomorphic_ivars();
290+
if is_recompile && payload.defer_count < 2 && has_unresolved {
291+
payload.defer_count = 2; // level 2: deferred_threshold(2) = 1K calls
292+
payload.deferred_stub_hits = 0;
293+
payload.profile.reset_for_recompile();
294+
unsafe { rb_zjit_profile_enable(iseq) };
295+
unsafe { rb_iseq_reset_jit_func(iseq) };
296+
incr_counter!(recompile_count);
297+
return Err(CompileError::DeferredForReprofiling);
298+
}
299+
}
300+
240301
// Compile the High-level IR
241302
let IseqCodePtrs { start_ptr, .. } = gen_iseq(cb, iseq, Some(&function)).inspect_err(|err| {
242303
debug!("{err:?}: gen_iseq failed: {}", iseq_get_location(iseq, 0));
@@ -2881,6 +2942,35 @@ c_callable! {
28812942
// code path can be made read-only. But you still need the check as is while holding the VM lock in any case.
28822943
let cb = ZJITState::get_code_block();
28832944
let payload = get_or_create_iseq_payload(iseq);
2945+
2946+
// If this ISEQ is being re-profiled after deferral, fall back to
2947+
// the interpreter — the zjit_* profiling instructions are active
2948+
// and collect type data on each fallback. The threshold escalates
2949+
// with each deferral level to give cold branches progressively more
2950+
// time to warm up. This gate fires for both first-compilation deferrals
2951+
// (versions empty) and inline-triggered recompilation deferrals
2952+
// (latest version invalidated).
2953+
let latest_invalidated = payload.versions.last()
2954+
.map(|v| unsafe { v.as_ref() }.status == IseqStatus::Invalidated)
2955+
.unwrap_or(false);
2956+
if payload.defer_count > 0 && (payload.versions.is_empty() || latest_invalidated) {
2957+
// Count stub hits toward the deferral threshold for BOTH initial
2958+
// deferrals (versions empty) and recompilation deferrals (latest
2959+
// invalidated). Previously, recompilation deferrals returned the
2960+
// exit trampoline unconditionally without counting, causing the
2961+
// method to stay in the interpreter indefinitely — a catastrophic
2962+
// overhead for hot methods (addressable-merge lost 2.5s).
2963+
let threshold = deferred_threshold(payload.defer_count);
2964+
payload.deferred_stub_hits += 1;
2965+
if payload.deferred_stub_hits <= threshold {
2966+
// Still collecting profile data — fall back to interpreter
2967+
unsafe { Rc::increment_strong_count(iseq_call_ptr as *const IseqCall); }
2968+
prepare_for_exit(iseq, cfp, sp, &CompileError::DeferredForReprofiling);
2969+
return ZJITState::get_exit_trampoline().raw_ptr(cb);
2970+
}
2971+
// Enough profile data collected — fall through to compile
2972+
}
2973+
28842974
let last_status = payload.versions.last().map(|version| &unsafe { version.as_ref() }.status);
28852975
let compile_error = match last_status {
28862976
Some(IseqStatus::CantCompile(err)) => Some(err),

zjit/src/hir.rs

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5679,6 +5679,55 @@ impl Function {
56795679
}
56805680
}
56815681

5682+
/// Count how many Send instructions have empty (NoProfile) type profiles.
5683+
/// Returns (no_profile_count, total_send_count).
5684+
pub fn count_no_profile_sends(&self) -> (usize, usize) {
5685+
let Some(profiles) = self.profiles.as_ref() else {
5686+
return (0, 0);
5687+
};
5688+
let mut no_profile = 0;
5689+
let mut total = 0;
5690+
for block in self.rpo() {
5691+
for &insn_id in &self.blocks[block.0].insns {
5692+
if let Insn::Send { state, .. } = self.find(insn_id) {
5693+
total += 1;
5694+
let frame_state = self.frame_state(state);
5695+
let insn_idx = frame_state.insn_idx;
5696+
// Check if the profile entry at this instruction is empty
5697+
if let Some(entries) = profiles.types.get(&insn_idx) {
5698+
if entries.is_empty() {
5699+
no_profile += 1;
5700+
}
5701+
}
5702+
}
5703+
}
5704+
}
5705+
(no_profile, total)
5706+
}
5707+
5708+
/// Check if any Send instructions have empty profiles.
5709+
/// Convenience wrapper around count_no_profile_sends().
5710+
pub fn has_no_profile_sends(&self) -> bool {
5711+
self.count_no_profile_sends().0 > 0
5712+
}
5713+
5714+
/// Check if the optimized HIR has any GetIvar/SetIvar instructions that
5715+
/// weren't optimized by optimize_getivar (i.e., they use the fallback
5716+
/// rb_vm_getinstancevariable/rb_vm_setinstancevariable path). These
5717+
/// survive in the HIR when the profiled self type was not monomorphic
5718+
/// or had no profile data at all.
5719+
pub fn has_not_monomorphic_ivars(&self) -> bool {
5720+
for block in self.rpo() {
5721+
for &insn_id in &self.blocks[block.0].insns {
5722+
match self.find(insn_id) {
5723+
Insn::GetIvar { .. } | Insn::SetIvar { .. } => return true,
5724+
_ => {}
5725+
}
5726+
}
5727+
}
5728+
false
5729+
}
5730+
56825731
/// Dump HIR passed to codegen if specified by options.
56835732
pub fn dump_hir(&self) {
56845733
// Dump HIR after optimization

zjit/src/payload.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,11 @@ pub struct IseqPayload {
1313
pub versions: Vec<IseqVersionRef>,
1414
/// Number of side exits observed for this ISEQ
1515
pub side_exit_count: u64,
16+
/// How many times this ISEQ has been deferred for re-profiling.
17+
/// 0 = not deferred, 1 = first deferral (call_threshold calls), 2 = second (1K calls), etc.
18+
pub defer_count: u32,
19+
/// How many stub/entry hits have been counted during the current deferral window.
20+
pub deferred_stub_hits: u32,
1621
}
1722

1823
impl IseqPayload {
@@ -21,6 +26,8 @@ impl IseqPayload {
2126
profile: IseqProfile::new(iseq_size),
2227
versions: vec![],
2328
side_exit_count: 0,
29+
defer_count: 0,
30+
deferred_stub_hits: 0,
2431
}
2532
}
2633
}

0 commit comments

Comments
 (0)