@@ -25,6 +25,7 @@ use crate::hir::{iseq_to_hir, BlockId, Invariant, RangeType, SideExitReason::{se
2525use crate :: hir:: { Const , FrameState , Function , Insn , InsnId , SendFallbackReason } ;
2626use crate :: hir_type:: { types, Type } ;
2727use crate :: options:: { get_option, rb_zjit_call_threshold} ;
28+ use crate :: profile:: ProfiledType ;
2829use crate :: cast:: IntoUsize ;
2930
3031/// At the moment, we support recompiling each ISEQ only once.
@@ -44,7 +45,7 @@ pub extern "C" fn rb_zjit_count_side_exit(payload_raw: *mut std::ffi::c_void) {
4445 None => return ,
4546 } ;
4647 with_vm_lock ( src_loc ! ( ) , || {
47- trigger_recompilation ( payload_raw, iseq) ;
48+ trigger_recompilation ( payload_raw, iseq, true ) ;
4849 } ) ;
4950 }
5051}
@@ -62,7 +63,9 @@ fn deferred_threshold(defer_count: u32) -> u32 {
6263 }
6364}
6465
65- fn trigger_recompilation ( payload_raw : * mut std:: ffi:: c_void , iseq : IseqPtr ) {
66+ /// When `preserve_profiles` is true, only counters are reset (type distributions survive).
67+ /// When false, both counters and type distributions are cleared.
68+ fn trigger_recompilation ( payload_raw : * mut std:: ffi:: c_void , iseq : IseqPtr , preserve_profiles : bool ) {
6669 if MAX_GLOBAL_RECOMPILATIONS > 0 {
6770 let prev = GLOBAL_RECOMPILE_COUNT . fetch_add ( 1 , Ordering :: Relaxed ) ;
6871 if prev >= MAX_GLOBAL_RECOMPILATIONS {
@@ -71,9 +74,13 @@ fn trigger_recompilation(payload_raw: *mut std::ffi::c_void, iseq: IseqPtr) {
7174 }
7275 }
7376 let payload = unsafe { & mut * ( payload_raw as * mut IseqPayload ) } ;
74- debug ! ( "trigger_recompilation: recompiling {}" , iseq_get_location( iseq, 0 ) ) ;
77+ debug ! ( "trigger_recompilation: recompiling {} (preserve_profiles={}) " , iseq_get_location( iseq, 0 ) , preserve_profiles ) ;
7578 incr_counter ! ( recompile_count) ;
76- payload. profile . reset_for_recompile ( ) ;
79+ if preserve_profiles {
80+ payload. profile . reset_counters_for_recompile ( ) ;
81+ } else {
82+ payload. profile . reset_for_recompile ( ) ;
83+ }
7784
7885 // Reset deferral state so V2 compilation goes straight to building the HIR.
7986 // If the HIR still has unresolved issues, the post-HIR deferral trigger handles escalation.
@@ -88,6 +95,76 @@ fn trigger_recompilation(payload_raw: *mut std::ffi::c_void, iseq: IseqPtr) {
8895 unsafe { rb_zjit_profile_enable ( iseq) } ;
8996}
9097
98+ /// Runtime helper called from JIT code to collect inline type feedback for NoProfile sends.
99+ /// When a NoProfile send executes, this records the receiver's class into the profiling data
100+ /// structure. After enough observations, triggers recompilation so the previously-NoProfile
101+ /// sends compile to direct calls using the collected type data.
102+ #[ unsafe( no_mangle) ]
103+ pub extern "C" fn rb_zjit_inline_profile_send (
104+ payload_raw : * mut std:: ffi:: c_void ,
105+ insn_idx : u64 ,
106+ recv : VALUE ,
107+ n_operands : u64 ,
108+ ) {
109+ if payload_raw. is_null ( ) { return ; }
110+ let payload = unsafe { & mut * ( payload_raw as * mut IseqPayload ) } ;
111+ let insn_idx = insn_idx as usize ;
112+
113+ let threshold = ( get_option ! ( recompile_threshold) as u64 ) / 2 ;
114+ if threshold == 0 || payload. no_profile_send_hits >= threshold { return ; }
115+
116+ payload. no_profile_send_hits += 1 ;
117+
118+ if payload. no_profile_send_hits == threshold && payload. versions . len ( ) < MAX_ISEQ_VERSIONS {
119+ if !payload. profile . inline_feedback_is_high_quality ( ) {
120+ return ;
121+ }
122+ payload. has_inline_feedback = true ;
123+ let iseq = match payload. versions . last ( ) {
124+ Some ( version_ref) => unsafe { version_ref. as_ref ( ) } . iseq ,
125+ None => return ,
126+ } ;
127+ with_vm_lock ( src_loc ! ( ) , || {
128+ trigger_recompilation ( payload_raw, iseq, true ) ;
129+ } ) ;
130+ return ;
131+ }
132+
133+ const INLINE_PROFILE_LIMIT : u32 = 5 ;
134+ if payload. profile . num_profiles_for ( insn_idx) >= INLINE_PROFILE_LIMIT { return ; }
135+
136+ let ty = ProfiledType :: new ( recv) ;
137+ if let Some ( version_ref) = payload. versions . last ( ) {
138+ let iseq = unsafe { version_ref. as_ref ( ) } . iseq ;
139+ VALUE :: from ( iseq) . write_barrier ( ty. class ( ) ) ;
140+ }
141+ payload. profile . observe_receiver ( insn_idx, n_operands as usize , ty) ;
142+ payload. profile . increment_num_profiles ( insn_idx) ;
143+ }
144+
145+ /// Lightweight runtime helper for not_monomorphic ivar fallbacks.
146+ /// Only increments the recompilation trigger counter — no type recording.
147+ #[ unsafe( no_mangle) ]
148+ pub extern "C" fn rb_zjit_count_ivar_fallback ( payload_raw : * mut std:: ffi:: c_void ) {
149+ if payload_raw. is_null ( ) { return ; }
150+ let payload = unsafe { & mut * ( payload_raw as * mut IseqPayload ) } ;
151+
152+ let threshold = get_option ! ( recompile_threshold) as u64 ;
153+ if threshold == 0 || payload. no_profile_send_hits >= threshold { return ; }
154+
155+ payload. no_profile_send_hits += 1 ;
156+
157+ if payload. no_profile_send_hits == threshold && payload. versions . len ( ) < MAX_ISEQ_VERSIONS {
158+ let iseq = match payload. versions . last ( ) {
159+ Some ( version_ref) => unsafe { version_ref. as_ref ( ) } . iseq ,
160+ None => return ,
161+ } ;
162+ with_vm_lock ( src_loc ! ( ) , || {
163+ trigger_recompilation ( payload_raw, iseq, true ) ;
164+ } ) ;
165+ }
166+ }
167+
91168unsafe extern "C" {
92169 fn rb_zjit_profile_enable ( iseq : IseqPtr ) ;
93170}
@@ -120,6 +197,11 @@ struct JITState {
120197 iseq_calls : Vec < IseqCallRef > ,
121198 payload_ptr : usize ,
122199 has_version_budget : bool ,
200+
201+ /// Whether inline profiling calls should be emitted for NoProfile sends.
202+ /// False when the ISEQ has too few NoProfile sends to justify the overhead.
203+ /// Set during gen_function based on the HIR's NoProfile send count.
204+ should_emit_inline_profiling : bool ,
123205}
124206
125207impl JITState {
@@ -136,6 +218,7 @@ impl JITState {
136218 iseq_calls : Vec :: default ( ) ,
137219 payload_ptr,
138220 has_version_budget,
221+ should_emit_inline_profiling : false , // Set by gen_function after HIR analysis
139222 }
140223 }
141224
@@ -287,10 +370,13 @@ fn gen_iseq_entry_point(cb: &mut CodeBlock, iseq: IseqPtr, jit_exception: bool)
287370 let ( no_profile_sends, total_sends) = function. count_no_profile_sends ( ) ;
288371 let sends_need_deferral = total_sends > 0 && no_profile_sends * 4 > total_sends;
289372 let has_unresolved = sends_need_deferral || function. has_not_monomorphic_ivars ( ) ;
290- if is_recompile && payload. defer_count < 2 && has_unresolved {
373+ let skip_deferral = payload. has_inline_feedback ;
374+ if is_recompile && payload. defer_count < 2 && has_unresolved && !skip_deferral {
291375 payload. defer_count = 2 ; // level 2: deferred_threshold(2) = 1K calls
292376 payload. deferred_stub_hits = 0 ;
293- payload. profile . reset_for_recompile ( ) ;
377+ // Preserve inline feedback — only reset counters so the interpreter
378+ // adds observations on top during the 1K-call deferral window.
379+ payload. profile . reset_counters_for_recompile ( ) ;
294380 unsafe { rb_zjit_profile_enable ( iseq) } ;
295381 unsafe { rb_iseq_reset_jit_func ( iseq) } ;
296382 incr_counter ! ( recompile_count) ;
@@ -436,6 +522,14 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, version: IseqVersionRef, func
436522 asm. payload_ptr = Some ( jit. payload_ptr ) ;
437523 }
438524
525+ // Enable inline profiling for ISEQs with enough NoProfile sends to justify
526+ // the overhead. ISEQs with <3 NoProfile sends don't benefit from inline
527+ // profiling — the side-exit path is sufficient for recompilation.
528+ if get_option ! ( recompile_threshold) > 0 && jit. has_version_budget && jit. payload_ptr != 0 {
529+ let ( no_profile, _total) = function. count_no_profile_sends ( ) ;
530+ jit. should_emit_inline_profiling = no_profile >= 3 ;
531+ }
532+
439533 // Mapping from HIR block IDs to LIR block IDs.
440534 // This is is a one-to-one mapping from HIR to LIR blocks used for finding
441535 // jump targets in LIR (LIR should always jump to the head of an HIR block)
@@ -678,8 +772,8 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
678772 Insn :: Param => unreachable ! ( "block.insns should not have Insn::Param" ) ,
679773 Insn :: LoadArg { .. } => return Ok ( ( ) ) , // compiled in the LoadArg pre-pass above
680774 Insn :: Snapshot { .. } => return Ok ( ( ) ) , // we don't need to do anything for this instruction at the moment
681- & Insn :: Send { cd, blockiseq : None , state, reason, .. } => gen_send_without_block ( jit, asm, cd, & function. frame_state ( state) , reason) ,
682- & Insn :: Send { cd, blockiseq : Some ( blockiseq) , state, reason, .. } => gen_send ( jit, asm, cd, blockiseq, & function. frame_state ( state) , reason) ,
775+ & Insn :: Send { cd, blockiseq : None , recv , state, reason, .. } => gen_send_without_block ( jit, asm, cd, recv , & function. frame_state ( state) , reason) ,
776+ & Insn :: Send { cd, blockiseq : Some ( blockiseq) , recv , state, reason, .. } => gen_send ( jit, asm, cd, recv , blockiseq, & function. frame_state ( state) , reason) ,
683777 & Insn :: SendForward { cd, blockiseq, state, reason, .. } => gen_send_forward ( jit, asm, cd, blockiseq, & function. frame_state ( state) , reason) ,
684778 Insn :: SendDirect { cme, iseq, recv, args, kw_bits, blockiseq, state, .. } => gen_send_iseq_direct ( cb, jit, asm, * cme, * iseq, opnd ! ( recv) , opnds ! ( args) , * kw_bits, & function. frame_state ( * state) , * blockiseq) ,
685779 & Insn :: InvokeSuper { cd, blockiseq, state, reason, .. } => gen_invokesuper ( jit, asm, cd, blockiseq, & function. frame_state ( state) , reason) ,
@@ -740,8 +834,8 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
740834 Insn :: CCall { cfunc, recv, args, name, return_type : _, elidable : _ } => gen_ccall ( asm, * cfunc, * name, opnd ! ( recv) , opnds ! ( args) ) ,
741835 // Give up CCallWithFrame for 7+ args since asm.ccall() supports at most 6 args (recv + args).
742836 // There's no test case for this because no core cfuncs have this many parameters. But C extensions could have such methods.
743- Insn :: CCallWithFrame { cd, state, args, .. } if args. len ( ) + 1 > C_ARG_OPNDS . len ( ) =>
744- gen_send_without_block ( jit, asm, * cd, & function. frame_state ( * state) , SendFallbackReason :: CCallWithFrameTooManyArgs ) ,
837+ Insn :: CCallWithFrame { cd, recv , state, args, .. } if args. len ( ) + 1 > C_ARG_OPNDS . len ( ) =>
838+ gen_send_without_block ( jit, asm, * cd, * recv , & function. frame_state ( * state) , SendFallbackReason :: CCallWithFrameTooManyArgs ) ,
745839 Insn :: CCallWithFrame { cfunc, recv, name, args, cme, state, blockiseq, .. } =>
746840 gen_ccall_with_frame ( jit, asm, * cfunc, * name, opnd ! ( recv) , opnds ! ( args) , * cme, * blockiseq, & function. frame_state ( * state) ) ,
747841 Insn :: CCallVariadic { cfunc, recv, name, args, cme, state, blockiseq, return_type : _, elidable : _ } => {
@@ -1225,6 +1319,12 @@ fn gen_ccall_variadic(
12251319
12261320/// Emit an uncached instance variable lookup
12271321fn gen_getivar ( jit : & mut JITState , asm : & mut Assembler , recv : Opnd , id : ID , ic : * const iseq_inline_iv_cache_entry ) -> Opnd {
1322+ // Count not_monomorphic ivar fallback executions for the recompilation trigger.
1323+ if jit. payload_ptr != 0 && jit. has_version_budget {
1324+ asm_comment ! ( asm, "count not_monomorphic getivar for recompilation" ) ;
1325+ asm_ccall ! ( asm, rb_zjit_count_ivar_fallback, Opnd :: UImm ( jit. payload_ptr as u64 ) ) ;
1326+ }
1327+
12281328 if ic. is_null ( ) {
12291329 asm_ccall ! ( asm, rb_ivar_get, recv, id. 0 . into( ) )
12301330 } else {
@@ -1235,6 +1335,12 @@ fn gen_getivar(jit: &mut JITState, asm: &mut Assembler, recv: Opnd, id: ID, ic:
12351335
12361336/// Emit an uncached instance variable store
12371337fn gen_setivar ( jit : & mut JITState , asm : & mut Assembler , recv : Opnd , id : ID , ic : * const iseq_inline_iv_cache_entry , val : Opnd , state : & FrameState ) {
1338+ // Count not_monomorphic ivar fallback executions for the recompilation trigger.
1339+ if jit. payload_ptr != 0 && jit. has_version_budget {
1340+ asm_comment ! ( asm, "count not_monomorphic setivar for recompilation" ) ;
1341+ asm_ccall ! ( asm, rb_zjit_count_ivar_fallback, Opnd :: UImm ( jit. payload_ptr as u64 ) ) ;
1342+ }
1343+
12381344 // Setting an ivar can raise FrozenError, so we need proper frame state for exception handling.
12391345 gen_prepare_non_leaf_call ( jit, asm, state) ;
12401346 if ic. is_null ( ) {
@@ -1480,16 +1586,57 @@ fn gen_if_false(asm: &mut Assembler, val: lir::Opnd, branch: lir::BranchEdge, fa
14801586}
14811587
14821588/// Compile a dynamic dispatch with block
1589+ /// Emit inline type feedback with an assembly-level guard that skips the ccall
1590+ /// when profiling is self-disabled (no_profile_send_hits >= threshold).
1591+ fn gen_guarded_inline_profile (
1592+ jit : & mut JITState ,
1593+ asm : & mut Assembler ,
1594+ recv_opnd : Opnd ,
1595+ insn_idx : u64 ,
1596+ n_operands : u64 ,
1597+ ) {
1598+ let threshold = ( get_option ! ( recompile_threshold) as u64 ) / 2 ;
1599+ if threshold == 0 { return ; }
1600+
1601+ let hir_block_id = asm. current_block ( ) . hir_block_id ;
1602+ let rpo_idx = asm. current_block ( ) . rpo_index ;
1603+ let skip_block = asm. new_block ( hir_block_id, false , rpo_idx) ;
1604+ let skip_edge = || Target :: Block ( lir:: BranchEdge { target : skip_block, args : vec ! [ ] } ) ;
1605+
1606+ asm_comment ! ( asm, "guard: skip inline profiling if self-disabled" ) ;
1607+ let payload_addr = asm. load ( Opnd :: UImm ( jit. payload_ptr as u64 ) ) ;
1608+ let offset = std:: mem:: offset_of!( crate :: payload:: IseqPayload , no_profile_send_hits) as i32 ;
1609+ let hits = asm. load ( Opnd :: mem ( 64 , payload_addr, offset) ) ;
1610+ asm. cmp ( hits, Opnd :: UImm ( threshold) ) ;
1611+ asm. jge ( jit, skip_edge ( ) ) ;
1612+
1613+ asm_comment ! ( asm, "inline type feedback for NoProfile send" ) ;
1614+ asm_ccall ! ( asm, rb_zjit_inline_profile_send, Opnd :: UImm ( jit. payload_ptr as u64 ) , Opnd :: UImm ( insn_idx) , recv_opnd, Opnd :: UImm ( n_operands) ) ;
1615+
1616+ asm. jmp ( skip_edge ( ) ) ;
1617+ asm. set_current_block ( skip_block) ;
1618+ let label = jit. get_label ( asm, skip_block, hir_block_id) ;
1619+ asm. write_label ( label) ;
1620+ }
1621+
14831622fn gen_send (
14841623 jit : & mut JITState ,
14851624 asm : & mut Assembler ,
14861625 cd : * const rb_call_data ,
1626+ recv : InsnId ,
14871627 blockiseq : IseqPtr ,
14881628 state : & FrameState ,
14891629 reason : SendFallbackReason ,
14901630) -> lir:: Opnd {
14911631 gen_incr_send_fallback_counter ( asm, reason) ;
14921632
1633+ // Inline type feedback for NoProfile sends
1634+ if matches ! ( reason, SendFallbackReason :: SendNoProfiles ) && jit. should_emit_inline_profiling {
1635+ let recv_opnd = jit. get_opnd ( recv) ;
1636+ let n_operands = unsafe { vm_ci_argc ( ( * cd) . ci ) } as u64 + 1 ;
1637+ gen_guarded_inline_profile ( jit, asm, recv_opnd, state. insn_idx as u64 , n_operands) ;
1638+ }
1639+
14931640 gen_prepare_non_leaf_call ( jit, asm, state) ;
14941641 asm_comment ! ( asm, "call #{} with dynamic dispatch" , ruby_call_method_name( cd) ) ;
14951642 unsafe extern "C" {
@@ -1531,11 +1678,19 @@ fn gen_send_without_block(
15311678 jit : & mut JITState ,
15321679 asm : & mut Assembler ,
15331680 cd : * const rb_call_data ,
1681+ recv : InsnId ,
15341682 state : & FrameState ,
15351683 reason : SendFallbackReason ,
15361684) -> lir:: Opnd {
15371685 gen_incr_send_fallback_counter ( asm, reason) ;
15381686
1687+ // Inline type feedback for NoProfile sends
1688+ if matches ! ( reason, SendFallbackReason :: SendWithoutBlockNoProfiles ) && jit. should_emit_inline_profiling {
1689+ let recv_opnd = jit. get_opnd ( recv) ;
1690+ let n_operands = unsafe { vm_ci_argc ( ( * cd) . ci ) } as u64 + 1 ;
1691+ gen_guarded_inline_profile ( jit, asm, recv_opnd, state. insn_idx as u64 , n_operands) ;
1692+ }
1693+
15391694 gen_prepare_non_leaf_call ( jit, asm, state) ;
15401695 asm_comment ! ( asm, "call #{} with dynamic dispatch" , ruby_call_method_name( cd) ) ;
15411696 unsafe extern "C" {
0 commit comments