Skip to content

Commit 7444096

Browse files
authored
ZJIT: Only load non-vreg opnds (ruby#16486)
When we call `asm.load`, many times we're passing in a VReg, and that causes extra loads when we lower to machine code. I'd like to only emit a load in the case that the operand _isn't_ a VReg. For example this code: ```ruby class Foo def initialize @foo = 123 end def foo @foo end end foo = Foo.new 5.times { foo.foo } ``` Before this patch, the machine code for `LoadField` looks like this: ``` # Insn: v18 LoadField v17, :_shape_id@0x4 # Load field id=_shape_id offset=4 0x121308320: mov x1, x0 0x121308324: ldur w1, [x1, #4] ``` Now it looks like this: ``` # Insn: v18 LoadField v17, :_shape_id@0x4 # Load field id=_shape_id offset=4 0x12339c320: ldur w1, [x0, #4] ``` We were able to eliminate a reg-reg copy.
1 parent 240c9d6 commit 7444096

1 file changed

Lines changed: 25 additions & 34 deletions

File tree

zjit/src/codegen.rs

Lines changed: 25 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1290,14 +1290,14 @@ fn gen_load_self() -> Opnd {
12901290
fn gen_load_field(asm: &mut Assembler, recv: Opnd, id: ID, offset: i32, return_type: Type) -> Opnd {
12911291
gen_incr_counter(asm, Counter::load_field_count);
12921292
asm_comment!(asm, "Load field id={} offset={}", id.contents_lossy(), offset);
1293-
let recv = asm.load(recv);
1293+
let recv = asm.load_mem(recv);
12941294
asm.load(Opnd::mem(return_type.num_bits(), recv, offset))
12951295
}
12961296

12971297
fn gen_store_field(asm: &mut Assembler, recv: Opnd, id: ID, offset: i32, val: Opnd, val_type: Type) {
12981298
gen_incr_counter(asm, Counter::store_field_count);
12991299
asm_comment!(asm, "Store field id={} offset={}", id.contents_lossy(), offset);
1300-
let recv = asm.load(recv);
1300+
let recv = asm.load_mem(recv);
13011301
asm.store(Opnd::mem(val_type.num_bits(), recv, offset), val);
13021302
}
13031303

@@ -1306,7 +1306,7 @@ fn gen_write_barrier(jit: &mut JITState, asm: &mut Assembler, recv: Opnd, val: O
13061306
// rb_obj_written() does: if (!RB_SPECIAL_CONST_P(val)) { rb_gc_writebarrier(recv, val); }
13071307
if !val_type.is_immediate() {
13081308
asm_comment!(asm, "Write barrier");
1309-
let recv = asm.load(recv);
1309+
let recv = asm.load_mem(recv);
13101310

13111311
// Create a result block that all paths converge to
13121312
let hir_block_id = asm.current_block().hir_block_id;
@@ -1766,8 +1766,8 @@ fn gen_array_aref(
17661766
array: Opnd,
17671767
index: Opnd,
17681768
) -> lir::Opnd {
1769-
let unboxed_idx = asm.load(index);
1770-
let array = asm.load(array);
1769+
let unboxed_idx = asm.load_mem(index);
1770+
let array = asm.load_mem(array);
17711771
let array_ptr = gen_array_ptr(asm, array);
17721772
let elem_offset = asm.lshift(unboxed_idx, Opnd::UImm(SIZEOF_VALUE.trailing_zeros() as u64));
17731773
let elem_ptr = asm.add(array_ptr, elem_offset);
@@ -1780,8 +1780,8 @@ fn gen_array_aset(
17801780
index: Opnd,
17811781
val: Opnd,
17821782
) {
1783-
let unboxed_idx = asm.load(index);
1784-
let array = asm.load(array);
1783+
let unboxed_idx = asm.load_mem(index);
1784+
let array = asm.load_mem(array);
17851785
let array_ptr = gen_array_ptr(asm, array);
17861786
let elem_offset = asm.lshift(unboxed_idx, Opnd::UImm(SIZEOF_VALUE.trailing_zeros() as u64));
17871787
let elem_ptr = asm.add(array_ptr, elem_offset);
@@ -1794,7 +1794,7 @@ fn gen_array_pop(asm: &mut Assembler, array: Opnd, state: &FrameState) -> lir::O
17941794
}
17951795

17961796
fn gen_array_length(asm: &mut Assembler, array: Opnd) -> lir::Opnd {
1797-
let array = asm.load(array);
1797+
let array = asm.load_mem(array);
17981798
let flags = Opnd::mem(VALUE_BITS, array, RUBY_OFFSET_RBASIC_FLAGS);
17991799
let embedded_len = asm.and(flags, (RARRAY_EMBED_LEN_MASK as u64).into());
18001800
let embedded_len = asm.rshift(embedded_len, (RARRAY_EMBED_LEN_SHIFT as u64).into());
@@ -1959,10 +1959,7 @@ fn gen_is_a(jit: &mut JITState, asm: &mut Assembler, obj: Opnd, class: Opnd) ->
19591959
args: vec![v],
19601960
});
19611961

1962-
let val = match obj {
1963-
Opnd::Reg(_) | Opnd::VReg { .. } => obj,
1964-
_ => asm.load(obj),
1965-
};
1962+
let val = asm.load_mem(obj);
19661963

19671964
// Immediate → definitely not String/Array/Hash
19681965
asm.test(val, Opnd::UImm(RUBY_IMMEDIATE_MASK as u64));
@@ -2252,7 +2249,7 @@ fn gen_box_bool(asm: &mut Assembler, val: lir::Opnd) -> lir::Opnd {
22522249

22532250
fn gen_box_fixnum(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, state: &FrameState) -> lir::Opnd {
22542251
// Load the value, then test for overflow and tag it
2255-
let val = asm.load(val);
2252+
let val = asm.load_mem(val);
22562253
let shifted = asm.lshift(val, Opnd::UImm(1));
22572254
asm.jo(jit, side_exit(jit, state, BoxFixnumOverflow));
22582255
asm.or(shifted, Opnd::UImm(RUBY_FIXNUM_FLAG as u64))
@@ -2315,10 +2312,7 @@ fn gen_has_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, ty: Typ
23152312

23162313
// If val isn't in a register, load it to use it as the base of Opnd::mem later.
23172314
// TODO: Max thinks codegen should not care about the shapes of the operands except to create them. (Shopify/ruby#685)
2318-
let val = match val {
2319-
Opnd::Reg(_) | Opnd::VReg { .. } => val,
2320-
_ => asm.load(val),
2321-
};
2315+
let val = asm.load_mem(val);
23222316

23232317
// Immediate → definitely not the class
23242318
asm.test(val, (RUBY_IMMEDIATE_MASK as u64).into());
@@ -2380,10 +2374,7 @@ fn gen_guard_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard
23802374

23812375
// If val isn't in a register, load it to use it as the base of Opnd::mem later.
23822376
// TODO: Max thinks codegen should not care about the shapes of the operands except to create them. (Shopify/ruby#685)
2383-
let val = match val {
2384-
Opnd::Reg(_) | Opnd::VReg { .. } => val,
2385-
_ => asm.load(val),
2386-
};
2377+
let val = asm.load_mem(val);
23872378

23882379
// Check if it's a special constant
23892380
let side_exit = side_exit(jit, state, GuardType(guard_type));
@@ -2410,10 +2401,7 @@ fn gen_guard_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard
24102401
asm.cmp(val, Qfalse.into());
24112402
asm.je(jit, side.clone());
24122403

2413-
let val = match val {
2414-
Opnd::Reg(_) | Opnd::VReg { .. } => val,
2415-
_ => asm.load(val),
2416-
};
2404+
let val = asm.load_mem(val);
24172405

24182406
let flags = asm.load(Opnd::mem(VALUE_BITS, val, RUBY_OFFSET_RBASIC_FLAGS));
24192407
let tag = asm.and(flags, Opnd::UImm(RUBY_T_MASK as u64));
@@ -2430,10 +2418,7 @@ fn gen_guard_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard
24302418
asm.cmp(val, Qfalse.into());
24312419
asm.je(jit, side.clone());
24322420

2433-
let val = match val {
2434-
Opnd::Reg(_) | Opnd::VReg { .. } => val,
2435-
_ => asm.load(val),
2436-
};
2421+
let val = asm.load_mem(val);
24372422

24382423
let flags = asm.load(Opnd::mem(VALUE_BITS, val, RUBY_OFFSET_RBASIC_FLAGS));
24392424
let tag = asm.and(flags, Opnd::UImm(RUBY_T_MASK as u64));
@@ -2471,10 +2456,7 @@ fn gen_guard_type_not(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, g
24712456
asm.cmp(val, Qfalse.into());
24722457
asm.je(jit, cont_edge());
24732458

2474-
let val = match val {
2475-
Opnd::Reg(_) | Opnd::VReg { .. } => val,
2476-
_ => asm.load(val),
2477-
};
2459+
let val = asm.load_mem(val);
24782460

24792461
let flags = asm.load(Opnd::mem(VALUE_BITS, val, RUBY_OFFSET_RBASIC_FLAGS));
24802462
let tag = asm.and(flags, Opnd::UImm(RUBY_T_MASK as u64));
@@ -3118,7 +3100,7 @@ fn gen_string_concat(jit: &mut JITState, asm: &mut Assembler, strings: Vec<Opnd>
31183100
// Generate RSTRING_PTR
31193101
fn get_string_ptr(asm: &mut Assembler, string: Opnd) -> Opnd {
31203102
asm_comment!(asm, "get string pointer for embedded or heap");
3121-
let string = asm.load(string);
3103+
let string = asm.load_mem(string);
31223104
let flags = Opnd::mem(VALUE_BITS, string, RUBY_OFFSET_RBASIC_FLAGS);
31233105
asm.test(flags, (RSTRING_NOEMBED as u64).into());
31243106
let heap_ptr = asm.load(Opnd::mem(
@@ -3184,6 +3166,15 @@ fn aligned_stack_bytes(num_slots: usize) -> usize {
31843166
}
31853167

31863168
impl Assembler {
3169+
/// Emits a load for memory based operands and returns a vreg,
3170+
/// otherwise returns recv.
3171+
fn load_mem(&mut self, recv: Opnd) -> Opnd {
3172+
match recv {
3173+
Opnd::VReg { .. } | Opnd::Reg(_) => recv,
3174+
_ => self.load(recv),
3175+
}
3176+
}
3177+
31873178
/// Make a C call while marking the start and end positions for IseqCall
31883179
fn ccall_with_iseq_call(&mut self, fptr: *const u8, opnds: Vec<Opnd>, iseq_call: &IseqCallRef) -> Opnd {
31893180
// We need to create our own branch rc objects so that we can move the closure below

0 commit comments

Comments
 (0)