Skip to content

Commit 3ab865a

Browse files
authored
implement winch x64 cmpxchg (#10039)
1 parent 4841cc0 commit 3ab865a

13 files changed

Lines changed: 441 additions & 0 deletions
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
4+
(module
5+
(memory 1 1 shared)
6+
(func (export "_start") (result i32)
7+
(i32.atomic.rmw16.cmpxchg_u (i32.const 0) (i32.const 42) (i32.const 1337))))
8+
;; wasm[0]::function[0]:
9+
;; pushq %rbp
10+
;; movq %rsp, %rbp
11+
;; movq 8(%rdi), %r11
12+
;; movq 0x10(%r11), %r11
13+
;; addq $0x18, %r11
14+
;; cmpq %rsp, %r11
15+
;; ja 0x82
16+
;; 1c: movq %rdi, %r14
17+
;; subq $0x10, %rsp
18+
;; movq %rdi, 8(%rsp)
19+
;; movq %rsi, (%rsp)
20+
;; movl $0x539, %eax
21+
;; movl $0x2a, %ecx
22+
;; movl $0, %edx
23+
;; andw $1, %dx
24+
;; cmpw $0, %dx
25+
;; jne 0x84
26+
;; 49: movl $0, %edx
27+
;; movq 0x58(%r14), %r11
28+
;; movq (%r11), %rbx
29+
;; addq %rdx, %rbx
30+
;; subq $4, %rsp
31+
;; movl %ecx, (%rsp)
32+
;; subq $4, %rsp
33+
;; movl %eax, (%rsp)
34+
;; movl (%rsp), %ecx
35+
;; addq $4, %rsp
36+
;; movl (%rsp), %eax
37+
;; addq $4, %rsp
38+
;; lock cmpxchgw %cx, (%rbx)
39+
;; movzwl %ax, %eax
40+
;; addq $0x10, %rsp
41+
;; popq %rbp
42+
;; retq
43+
;; 82: ud2
44+
;; 84: ud2
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
4+
(module
5+
(memory 1 1 shared)
6+
(func (export "_start") (result i32)
7+
(i32.atomic.rmw8.cmpxchg_u (i32.const 0) (i32.const 42) (i32.const 1337))))
8+
;; wasm[0]::function[0]:
9+
;; pushq %rbp
10+
;; movq %rsp, %rbp
11+
;; movq 8(%rdi), %r11
12+
;; movq 0x10(%r11), %r11
13+
;; addq $0x18, %r11
14+
;; cmpq %rsp, %r11
15+
;; ja 0x6e
16+
;; 1c: movq %rdi, %r14
17+
;; subq $0x10, %rsp
18+
;; movq %rdi, 8(%rsp)
19+
;; movq %rsi, (%rsp)
20+
;; movl $0x539, %eax
21+
;; movl $0x2a, %ecx
22+
;; movl $0, %edx
23+
;; movq 0x58(%r14), %r11
24+
;; movq (%r11), %rbx
25+
;; addq %rdx, %rbx
26+
;; subq $4, %rsp
27+
;; movl %ecx, (%rsp)
28+
;; subq $4, %rsp
29+
;; movl %eax, (%rsp)
30+
;; movl (%rsp), %ecx
31+
;; addq $4, %rsp
32+
;; movl (%rsp), %eax
33+
;; addq $4, %rsp
34+
;; lock cmpxchgb %cl, (%rbx)
35+
;; movzbl %al, %eax
36+
;; addq $0x10, %rsp
37+
;; popq %rbp
38+
;; retq
39+
;; 6e: ud2
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
4+
(module
5+
(memory 1 1 shared)
6+
(func (export "_start") (result i32)
7+
(i32.atomic.rmw.cmpxchg (i32.const 0) (i32.const 42) (i32.const 1337))))
8+
;; wasm[0]::function[0]:
9+
;; pushq %rbp
10+
;; movq %rsp, %rbp
11+
;; movq 8(%rdi), %r11
12+
;; movq 0x10(%r11), %r11
13+
;; addq $0x18, %r11
14+
;; cmpq %rsp, %r11
15+
;; ja 0x7c
16+
;; 1c: movq %rdi, %r14
17+
;; subq $0x10, %rsp
18+
;; movq %rdi, 8(%rsp)
19+
;; movq %rsi, (%rsp)
20+
;; movl $0x539, %eax
21+
;; movl $0x2a, %ecx
22+
;; movl $0, %edx
23+
;; andl $3, %edx
24+
;; cmpl $0, %edx
25+
;; jne 0x7e
26+
;; 47: movl $0, %edx
27+
;; movq 0x58(%r14), %r11
28+
;; movq (%r11), %rbx
29+
;; addq %rdx, %rbx
30+
;; subq $4, %rsp
31+
;; movl %ecx, (%rsp)
32+
;; subq $4, %rsp
33+
;; movl %eax, (%rsp)
34+
;; movl (%rsp), %ecx
35+
;; addq $4, %rsp
36+
;; movl (%rsp), %eax
37+
;; addq $4, %rsp
38+
;; lock cmpxchgl %ecx, (%rbx)
39+
;; addq $0x10, %rsp
40+
;; popq %rbp
41+
;; retq
42+
;; 7c: ud2
43+
;; 7e: ud2
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
4+
(module
5+
(memory 1 1 shared)
6+
(func (export "_start") (result i64)
7+
(i64.atomic.rmw16.cmpxchg_u (i32.const 0) (i64.const 42) (i64.const 1337))))
8+
;; wasm[0]::function[0]:
9+
;; pushq %rbp
10+
;; movq %rsp, %rbp
11+
;; movq 8(%rdi), %r11
12+
;; movq 0x10(%r11), %r11
13+
;; addq $0x20, %r11
14+
;; cmpq %rsp, %r11
15+
;; ja 0x6f
16+
;; 1c: movq %rdi, %r14
17+
;; subq $0x10, %rsp
18+
;; movq %rdi, 8(%rsp)
19+
;; movq %rsi, (%rsp)
20+
;; movq $0x539, %rax
21+
;; movq $0x2a, %rcx
22+
;; movl $0, %edx
23+
;; andw $1, %dx
24+
;; cmpw $0, %dx
25+
;; jne 0x71
26+
;; 4d: movl $0, %edx
27+
;; movq 0x58(%r14), %r11
28+
;; movq (%r11), %rbx
29+
;; addq %rdx, %rbx
30+
;; pushq %rcx
31+
;; pushq %rax
32+
;; popq %rcx
33+
;; popq %rax
34+
;; lock cmpxchgw %cx, (%rbx)
35+
;; movzwq %ax, %rax
36+
;; addq $0x10, %rsp
37+
;; popq %rbp
38+
;; retq
39+
;; 6f: ud2
40+
;; 71: ud2
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
4+
(module
5+
(memory 1 1 shared)
6+
(func (export "_start") (result i64)
7+
(i64.atomic.rmw32.cmpxchg_u (i32.const 0) (i64.const 42) (i64.const 1337))))
8+
;; wasm[0]::function[0]:
9+
;; pushq %rbp
10+
;; movq %rsp, %rbp
11+
;; movq 8(%rdi), %r11
12+
;; movq 0x10(%r11), %r11
13+
;; addq $0x20, %r11
14+
;; cmpq %rsp, %r11
15+
;; ja 0x68
16+
;; 1c: movq %rdi, %r14
17+
;; subq $0x10, %rsp
18+
;; movq %rdi, 8(%rsp)
19+
;; movq %rsi, (%rsp)
20+
;; movq $0x539, %rax
21+
;; movq $0x2a, %rcx
22+
;; movl $0, %edx
23+
;; andl $3, %edx
24+
;; cmpl $0, %edx
25+
;; jne 0x6a
26+
;; 4b: movl $0, %edx
27+
;; movq 0x58(%r14), %r11
28+
;; movq (%r11), %rbx
29+
;; addq %rdx, %rbx
30+
;; pushq %rcx
31+
;; pushq %rax
32+
;; popq %rcx
33+
;; popq %rax
34+
;; lock cmpxchgl %ecx, (%rbx)
35+
;; addq $0x10, %rsp
36+
;; popq %rbp
37+
;; retq
38+
;; 68: ud2
39+
;; 6a: ud2
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
4+
(module
5+
(memory 1 1 shared)
6+
(func (export "_start") (result i64)
7+
(i64.atomic.rmw8.cmpxchg_u (i32.const 0) (i64.const 42) (i64.const 1337))))
8+
;; wasm[0]::function[0]:
9+
;; pushq %rbp
10+
;; movq %rsp, %rbp
11+
;; movq 8(%rdi), %r11
12+
;; movq 0x10(%r11), %r11
13+
;; addq $0x20, %r11
14+
;; cmpq %rsp, %r11
15+
;; ja 0x5b
16+
;; 1c: movq %rdi, %r14
17+
;; subq $0x10, %rsp
18+
;; movq %rdi, 8(%rsp)
19+
;; movq %rsi, (%rsp)
20+
;; movq $0x539, %rax
21+
;; movq $0x2a, %rcx
22+
;; movl $0, %edx
23+
;; movq 0x58(%r14), %r11
24+
;; movq (%r11), %rbx
25+
;; addq %rdx, %rbx
26+
;; pushq %rcx
27+
;; pushq %rax
28+
;; popq %rcx
29+
;; popq %rax
30+
;; lock cmpxchgb %cl, (%rbx)
31+
;; movzbq %al, %rax
32+
;; addq $0x10, %rsp
33+
;; popq %rbp
34+
;; retq
35+
;; 5b: ud2
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
4+
(module
5+
(memory 1 1 shared)
6+
(func (export "_start") (result i64)
7+
(i64.atomic.rmw.cmpxchg (i32.const 0) (i64.const 42) (i64.const 1337))))
8+
;; wasm[0]::function[0]:
9+
;; pushq %rbp
10+
;; movq %rsp, %rbp
11+
;; movq 8(%rdi), %r11
12+
;; movq 0x10(%r11), %r11
13+
;; addq $0x20, %r11
14+
;; cmpq %rsp, %r11
15+
;; ja 0x6b
16+
;; 1c: movq %rdi, %r14
17+
;; subq $0x10, %rsp
18+
;; movq %rdi, 8(%rsp)
19+
;; movq %rsi, (%rsp)
20+
;; movq $0x539, %rax
21+
;; movq $0x2a, %rcx
22+
;; movl $0, %edx
23+
;; andq $7, %rdx
24+
;; cmpq $0, %rdx
25+
;; jne 0x6d
26+
;; 4d: movl $0, %edx
27+
;; movq 0x58(%r14), %r11
28+
;; movq (%r11), %rbx
29+
;; addq %rdx, %rbx
30+
;; pushq %rcx
31+
;; pushq %rax
32+
;; popq %rcx
33+
;; popq %rax
34+
;; lock cmpxchgq %rcx, (%rbx)
35+
;; addq $0x10, %rsp
36+
;; popq %rbp
37+
;; retq
38+
;; 6b: ud2
39+
;; 6d: ud2

winch/codegen/src/codegen/mod.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1384,6 +1384,39 @@ where
13841384

13851385
Ok(())
13861386
}
1387+
1388+
pub(crate) fn emit_atomic_cmpxchg(
1389+
&mut self,
1390+
arg: &MemArg,
1391+
size: OperandSize,
1392+
extend: Option<Extend<Zero>>,
1393+
) -> Result<()> {
1394+
// Emission for this instruction is a bit trickier. The address for the CAS is the 3rd from
1395+
// the top of the stack, and we must emit instruction to compute the actual address with
1396+
// `emit_compute_heap_address_align_checked`, while we still have access to self. However,
1397+
// some ISAs have requirements with regard to the registers used for some arguments, so we
1398+
// need to pass the context to the masm. To solve this issue, we pop the two first
1399+
// arguments from the stack, compute the address, push back the arguments, and hand over
1400+
// the control to masm. The implementer of `atomic_cas` can expect to find `expected` and
1401+
// `replacement` at the top the context's stack.
1402+
1403+
// pop the args
1404+
let replacement = self.context.pop_to_reg(self.masm, None)?;
1405+
let expected = self.context.pop_to_reg(self.masm, None)?;
1406+
1407+
if let Some(addr) = self.emit_compute_heap_address_align_checked(arg, size)? {
1408+
// push back the args
1409+
self.context.stack.push(expected.into());
1410+
self.context.stack.push(replacement.into());
1411+
1412+
let src = self.masm.address_at_reg(addr, 0)?;
1413+
self.masm
1414+
.atomic_cas(&mut self.context, src, size, UNTRUSTED_FLAGS, extend)?;
1415+
1416+
self.context.free_reg(addr);
1417+
}
1418+
Ok(())
1419+
}
13871420
}
13881421

13891422
/// Returns the index of the [`ControlStackFrame`] for the given

winch/codegen/src/isa/aarch64/masm.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -928,6 +928,17 @@ impl Masm for MacroAssembler {
928928
) -> Result<()> {
929929
bail!(CodeGenError::unimplemented_masm_instruction())
930930
}
931+
932+
fn atomic_cas(
933+
&mut self,
934+
_context: &mut CodeGenContext<Emission>,
935+
_addr: Self::Address,
936+
_size: OperandSize,
937+
_flags: MemFlags,
938+
_extend: Option<Extend<Zero>>,
939+
) -> Result<()> {
940+
Err(anyhow!(CodeGenError::unimplemented_masm_instruction()))
941+
}
931942
}
932943

933944
impl MacroAssembler {

winch/codegen/src/isa/x64/asm.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1196,6 +1196,32 @@ impl Assembler {
11961196
dst_old: dst.map(Into::into),
11971197
});
11981198
}
1199+
pub fn cmpxchg(
1200+
&mut self,
1201+
addr: Address,
1202+
expected: Reg,
1203+
replacement: Reg,
1204+
dst: WritableReg,
1205+
size: OperandSize,
1206+
flags: MemFlags,
1207+
) {
1208+
assert!(addr.is_offset());
1209+
let mem = Self::to_synthetic_amode(
1210+
&addr,
1211+
&mut self.pool,
1212+
&mut self.constants,
1213+
&mut self.buffer,
1214+
flags,
1215+
);
1216+
1217+
self.emit(Inst::LockCmpxchg {
1218+
ty: Type::int_with_byte_size(size.bytes() as _).unwrap(),
1219+
replacement: replacement.into(),
1220+
expected: expected.into(),
1221+
mem,
1222+
dst_old: dst.map(Into::into),
1223+
})
1224+
}
11991225

12001226
pub fn cmp_ir(&mut self, src1: Reg, imm: i32, size: OperandSize) {
12011227
let imm = RegMemImm::imm(imm as u32);

0 commit comments

Comments
 (0)