Skip to content

Commit 9626d0b

Browse files
committed
ZJIT: x64: Prefer 7-byte sign extending mov over 10-byte movabs
Relevant for small negative immediates. Previously: # Insn: v16 SetLocal l1, EP@3, v10 mov rsi, qword ptr [r13 + 0x20] mov rsi, qword ptr [rsi - 8] and rsi, 0xfffffffffffffffc # call rb_vm_env_write push rdi push rdi mov rdx, rdi mov rdi, rsi movabs rsi, 0xfffffffffffffffd
1 parent 9bd9f26 commit 9626d0b

2 files changed

Lines changed: 46 additions & 59 deletions

File tree

zjit/src/asm/x86_64/mod.rs

Lines changed: 38 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,10 @@ impl X86Reg {
102102
reg_no: self.reg_no
103103
}
104104
}
105+
106+
pub fn rex_needed(&self) -> bool {
107+
self.reg_no > 7 || self.num_bits == 8 && self.reg_no >= 4
108+
}
105109
}
106110

107111
impl X86Opnd {
@@ -110,7 +114,7 @@ impl X86Opnd {
110114
X86Opnd::None => false,
111115
X86Opnd::Imm(_) => false,
112116
X86Opnd::UImm(_) => false,
113-
X86Opnd::Reg(reg) => reg.reg_no > 7 || reg.num_bits == 8 && reg.reg_no >= 4,
117+
X86Opnd::Reg(reg) => reg.rex_needed(),
114118
X86Opnd::Mem(mem) => mem.base_reg_no > 7 || (mem.idx_reg_no.unwrap_or(0) > 7),
115119
X86Opnd::IPRel(_) => false
116120
}
@@ -923,60 +927,43 @@ pub fn lea(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) {
923927

924928
/// mov - Data move operation
925929
pub fn mov(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) {
926-
match (dst, src) {
927-
// R + Imm
928-
(X86Opnd::Reg(reg), X86Opnd::Imm(imm)) => {
929-
assert!(imm.num_bits <= reg.num_bits);
930-
931-
// In case the source immediate could be zero extended to be 64
932-
// bit, we can use the 32-bit operands version of the instruction.
933-
// For example, we can turn mov(rax, 0x34) into the equivalent
934-
// mov(eax, 0x34).
935-
if (reg.num_bits == 64) && (imm.value > 0) && (imm.num_bits <= 32) {
936-
if dst.rex_needed() {
937-
write_rex(cb, false, 0, 0, reg.reg_no);
938-
}
939-
write_opcode(cb, 0xB8, reg);
940-
cb.write_int(imm.value as u64, 32);
941-
} else {
942-
if reg.num_bits == 16 {
943-
cb.write_byte(0x66);
944-
}
945-
946-
if dst.rex_needed() || reg.num_bits == 64 {
947-
write_rex(cb, reg.num_bits == 64, 0, 0, reg.reg_no);
948-
}
930+
fn emit_reg_imm(cb: &mut CodeBlock, reg: X86Reg, imm: X86Imm) {
931+
// In case the source immediate could be zero extended to be 64
932+
// bit, we can use the 32-bit operands version of the instruction.
933+
// For example, we can turn mov(rax, 0x34) into the equivalent
934+
// mov(eax, 0x34).
935+
if (reg.num_bits == 64) && u32::try_from(imm.value).is_ok() {
936+
if reg.rex_needed() {
937+
write_rex(cb, false, 0, 0, reg.reg_no);
938+
}
939+
write_opcode(cb, 0xB8, reg);
940+
cb.write_int(imm.value as u64, 32);
941+
} else if reg.num_bits == 64 && imm.num_bits <= 32 {
942+
// Use 32-to-64 bit sign-extension when possible
943+
write_rm(cb, false, true, X86Opnd::None, X86Opnd::Reg(reg), Some(0), &[0xc7]);
944+
cb.write_int(imm.value as u64, 32);
945+
} else {
946+
if reg.num_bits == 16 {
947+
cb.write_byte(0x66);
948+
}
949949

950-
write_opcode(cb, if reg.num_bits == 8 { 0xb0 } else { 0xb8 }, reg);
951-
cb.write_int(imm.value as u64, reg.num_bits.into());
950+
if reg.rex_needed() || reg.num_bits == 64 {
951+
write_rex(cb, reg.num_bits == 64, 0, 0, reg.reg_no);
952952
}
953-
},
953+
954+
write_opcode(cb, if reg.num_bits == 8 { 0xb0 } else { 0xb8 }, reg);
955+
cb.write_int(imm.value as u64, reg.num_bits.into());
956+
}
957+
}
958+
match (dst, src) {
959+
// R + Imm
960+
(X86Opnd::Reg(reg), X86Opnd::Imm(imm)) => emit_reg_imm(cb, reg, imm),
954961
// R + UImm
955962
(X86Opnd::Reg(reg), X86Opnd::UImm(uimm)) => {
956-
assert!(uimm.num_bits <= reg.num_bits);
957-
958-
// In case the source immediate could be zero extended to be 64
959-
// bit, we can use the 32-bit operands version of the instruction.
960-
// For example, we can turn mov(rax, 0x34) into the equivalent
961-
// mov(eax, 0x34).
962-
if (reg.num_bits == 64) && (uimm.value <= u32::MAX.into()) {
963-
if dst.rex_needed() {
964-
write_rex(cb, false, 0, 0, reg.reg_no);
965-
}
966-
write_opcode(cb, 0xB8, reg);
967-
cb.write_int(uimm.value, 32);
968-
} else {
969-
if reg.num_bits == 16 {
970-
cb.write_byte(0x66);
971-
}
972-
973-
if dst.rex_needed() || reg.num_bits == 64 {
974-
write_rex(cb, reg.num_bits == 64, 0, 0, reg.reg_no);
975-
}
976-
977-
write_opcode(cb, if reg.num_bits == 8 { 0xb0 } else { 0xb8 }, reg);
978-
cb.write_int(uimm.value, reg.num_bits.into());
979-
}
963+
// u64->i64 type cast is a bit pattern no-op
964+
let value: u64 = uimm.value;
965+
let value = value as i64;
966+
emit_reg_imm(cb, reg, X86Imm { num_bits: imm_num_bits(value), value });
980967
},
981968
// M + Imm
982969
(X86Opnd::Mem(mem), X86Opnd::Imm(imm)) => {

zjit/src/asm/x86_64/tests.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -373,13 +373,13 @@ fn test_mov() {
373373
0x0: mov edx, dword ptr [rbx + 0x80]
374374
0x0: mov rax, qword ptr [rsp + 4]
375375
0x0: mov r8d, 0x34
376-
0x0: movabs r8, 0x80000000
377-
0x0: movabs r8, 0xffffffffffffffff
376+
0x0: mov r8d, 0x80000000
377+
0x0: mov r8, 0xffffffffffffffff
378378
0x0: mov eax, 0x34
379379
0x0: movabs rax, 0xffc0000000000002
380-
0x0: movabs rax, 0x80000000
381-
0x0: movabs rax, 0xffffffffffffffcc
382-
0x0: movabs rax, 0xffffffffffffffff
380+
0x0: mov eax, 0x80000000
381+
0x0: mov rax, 0xffffffffffffffcc
382+
0x0: mov rax, 0xffffffffffffffff
383383
0x0: mov cl, r9b
384384
0x0: mov rbx, rax
385385
0x0: mov rdi, rbx
@@ -488,16 +488,16 @@ fn test_mov_unsigned() {
488488
0x0: mov eax, 1
489489
0x0: mov eax, 0xffffffff
490490
0x0: movabs rax, 0x100000000
491-
0x0: movabs rax, 0xffffffffffffffff
492-
0x0: movabs r8, 0xffffffffffffffff
491+
0x0: mov rax, 0xffffffffffffffff
492+
0x0: mov r8, 0xffffffffffffffff
493493
0x0: mov r8b, 1
494494
0x0: mov r8b, 0xff
495495
0x0: mov r8w, 1
496496
0x0: mov r8w, 0xffff
497497
0x0: mov r8d, 1
498498
0x0: mov r8d, 0xffffffff
499499
0x0: mov r8d, 1
500-
0x0: movabs r8, 0xffffffffffffffff
500+
0x0: mov r8, 0xffffffffffffffff
501501
");
502502

503503
assert_snapshot!(hexdumps!(cb01, cb02, cb03, cb04, cb05, cb06, cb07, cb08, cb09, cb10, cb11, cb12, cb13, cb14, cb15, cb16, cb17, cb18, cb19, cb20, cb21), @"

0 commit comments

Comments
 (0)