Skip to content

Commit ae4689e

Browse files
committed
ZJIT: Avoid redundant SP save in codegen
1 parent 66b2cc3 commit ae4689e

1 file changed

Lines changed: 19 additions & 10 deletions

File tree

zjit/src/codegen.rs

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -789,8 +789,9 @@ fn gen_ccall_with_frame(
789789
let caller_stack_size = state.stack().len() - args_with_recv_len;
790790

791791
// Can't use gen_prepare_non_leaf_call() because we need to adjust the SP
792-
// to account for the receiver and arguments (and block arguments if any)
793-
gen_prepare_call_with_gc(asm, state, false);
792+
// to account for the receiver and arguments (and block arguments if any).
793+
// Skip SP save in gen_prepare_call_with_gc since we call gen_save_sp right after.
794+
gen_prepare_call_with_gc(asm, state, false, true);
794795
gen_save_sp(asm, caller_stack_size);
795796
gen_spill_stack(jit, asm, state);
796797
gen_spill_locals(jit, asm, state);
@@ -874,8 +875,9 @@ fn gen_ccall_variadic(
874875
let caller_stack_size = state.stack_size() - args_with_recv_len;
875876

876877
// Can't use gen_prepare_non_leaf_call() because we need to adjust the SP
877-
// to account for the receiver and arguments (like gen_ccall_with_frame does)
878-
gen_prepare_call_with_gc(asm, state, false);
878+
// to account for the receiver and arguments (like gen_ccall_with_frame does).
879+
// Skip SP save in gen_prepare_call_with_gc since we call gen_save_sp right after.
880+
gen_prepare_call_with_gc(asm, state, false, true);
879881
gen_save_sp(asm, caller_stack_size);
880882
gen_spill_stack(jit, asm, state);
881883
gen_spill_locals(jit, asm, state);
@@ -1303,8 +1305,9 @@ fn gen_send_without_block_direct(
13031305
let stack_growth = state.stack_size() + local_size + unsafe { get_iseq_body_stack_max(iseq) }.to_usize();
13041306
gen_stack_overflow_check(jit, asm, state, stack_growth);
13051307

1306-
// Save cfp->pc and cfp->sp for the caller frame
1307-
gen_prepare_call_with_gc(asm, state, false);
1308+
// Save cfp->pc and cfp->sp for the caller frame.
1309+
// Skip SP save in gen_prepare_call_with_gc since we call gen_save_sp right after.
1310+
gen_prepare_call_with_gc(asm, state, false, true);
13081311
// Special SP math. Can't use gen_prepare_non_leaf_call
13091312
gen_save_sp(asm, state.stack().len() - args.len() - 1); // -1 for receiver
13101313

@@ -2016,15 +2019,21 @@ fn gen_incr_send_fallback_counter(asm: &mut Assembler, reason: SendFallbackReaso
20162019
/// because the backend spills all live registers onto the C stack on CCall.
20172020
/// However, to avoid marking uninitialized stack slots, this also updates SP,
20182021
/// which may have cfp->sp for a past frame or a past non-leaf call.
2019-
fn gen_prepare_call_with_gc(asm: &mut Assembler, state: &FrameState, leaf: bool) {
2022+
///
2023+
/// Set `skip_sp_save` to true if the caller will call gen_save_sp() immediately
2024+
/// after with a different stack size (e.g., gen_ccall_with_frame adjusts SP to
2025+
/// exclude receiver and arguments).
2026+
fn gen_prepare_call_with_gc(asm: &mut Assembler, state: &FrameState, leaf: bool, skip_sp_save: bool) {
20202027
let opcode: usize = state.get_opcode().try_into().unwrap();
20212028
let next_pc: *const VALUE = unsafe { state.pc.offset(insn_len(opcode) as isize) };
20222029

20232030
gen_incr_counter(asm, Counter::vm_write_pc_count);
20242031
asm_comment!(asm, "save PC to CFP");
20252032
asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC), Opnd::const_ptr(next_pc));
20262033

2027-
gen_save_sp(asm, state.stack_size());
2034+
if !skip_sp_save {
2035+
gen_save_sp(asm, state.stack_size());
2036+
}
20282037
if leaf {
20292038
asm.expect_leaf_ccall(state.stack_size());
20302039
}
@@ -2044,7 +2053,7 @@ fn gen_prepare_leaf_call_with_gc(asm: &mut Assembler, state: &FrameState) {
20442053
// We use state.without_stack() to pass stack_size=0 to gen_save_sp() because we don't write
20452054
// VM stack slots on leaf calls, which leaves those stack slots uninitialized. ZJIT keeps
20462055
// live objects on the C stack, so they are protected from GC properly.
2047-
gen_prepare_call_with_gc(asm, &state.without_stack(), true);
2056+
gen_prepare_call_with_gc(asm, &state.without_stack(), true, false);
20482057
}
20492058

20502059
/// Save the current SP on the CFP
@@ -2087,7 +2096,7 @@ fn gen_prepare_non_leaf_call(jit: &JITState, asm: &mut Assembler, state: &FrameS
20872096
// TODO: Lazily materialize caller frames when needed
20882097
// Save PC for backtraces and allocation tracing
20892098
// and SP to avoid marking uninitialized stack slots
2090-
gen_prepare_call_with_gc(asm, state, false);
2099+
gen_prepare_call_with_gc(asm, state, false, false);
20912100

20922101
// Spill the virtual stack in case it raises an exception
20932102
// and the interpreter uses the stack for handling the exception

0 commit comments

Comments
 (0)