Skip to content
This repository was archived by the owner on Sep 8, 2025. It is now read-only.

Commit 0ad4b70

Browse files
authored
Merge pull request #197 from alexcrichton/merge
Well, I thought I was done for today
2 parents 74ae184 + dbe58f3 commit 0ad4b70

5 files changed

Lines changed: 110 additions & 1 deletion

File tree

cranelift/codegen/src/isa/x64/inst.isle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5058,7 +5058,7 @@
50585058

50595059
(decl bitcast_gpr_to_xmm (u8 Gpr) Xmm)
50605060
(rule (bitcast_gpr_to_xmm 16 src)
5061-
(x64_pinsrw (xmm_uninit_value) src 0))
5061+
(x64_pinsrw (xmm_zero $I16X8) src 0))
50625062
(rule (bitcast_gpr_to_xmm 32 src)
50635063
(x64_movd_to_xmm src))
50645064
(rule (bitcast_gpr_to_xmm 64 src)

cranelift/filetests/filetests/isa/x64/bitcast.clif

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ block0(v0: i16):
3838
; movq %rsp, %rbp
3939
; block0:
4040
; uninit %xmm0
41+
; pxor %xmm0, %xmm0
4142
; pinsrw $0x0, %edi, %xmm0
4243
; movq %rbp, %rsp
4344
; popq %rbp
@@ -48,6 +49,7 @@ block0(v0: i16):
4849
; pushq %rbp
4950
; movq %rsp, %rbp
5051
; block1: ; offset 0x4
52+
; pxor %xmm0, %xmm0
5153
; pinsrw $0, %edi, %xmm0
5254
; movq %rbp, %rsp
5355
; popq %rbp
@@ -405,6 +407,7 @@ block0(v0: i16):
405407
; movq %rsp, %rbp
406408
; block0:
407409
; uninit %xmm0
410+
; pxor %xmm0, %xmm0
408411
; pinsrw $0x0, %edi, %xmm0
409412
; movq %rbp, %rsp
410413
; popq %rbp
@@ -415,6 +418,7 @@ block0(v0: i16):
415418
; pushq %rbp
416419
; movq %rsp, %rbp
417420
; block1: ; offset 0x4
421+
; pxor %xmm0, %xmm0
418422
; pinsrw $0, %edi, %xmm0
419423
; movq %rbp, %rsp
420424
; popq %rbp

cranelift/filetests/filetests/isa/x64/f16const.clif

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ block0():
3939
; block0:
4040
; movl $15360, %esi
4141
; uninit %xmm0
42+
; pxor %xmm0, %xmm0
4243
; pinsrw $0x0, %esi, %xmm0
4344
; movq %rbp, %rsp
4445
; popq %rbp
@@ -50,6 +51,7 @@ block0():
5051
; movq %rsp, %rbp
5152
; block1: ; offset 0x4
5253
; movl $0x3c00, %esi
54+
; pxor %xmm0, %xmm0
5355
; pinsrw $0, %esi, %xmm0
5456
; movq %rbp, %rsp
5557
; popq %rbp
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
;; This has an associated runtest; this test explains that one by disassembling
2+
;; the test case here.
3+
4+
test compile precise-output
5+
set opt_level=none
6+
set preserve_frame_pointers=true
7+
set enable_multi_ret_implicit_sret=true
8+
target x86_64
9+
10+
function %munge_xmm0() -> i16x8 fast {
11+
block0:
12+
v0 = vconst.i16x8 [0xffff 0xffff 0xffff 0xffff 0xffff 0xffff 0xffff 0xffff]
13+
return v0
14+
}
15+
16+
; VCode:
17+
; pushq %rbp
18+
; movq %rsp, %rbp
19+
; block0:
20+
; uninit %xmm0
21+
; pcmpeqd %xmm0, %xmm0
22+
; movq %rbp, %rsp
23+
; popq %rbp
24+
; ret
25+
;
26+
; Disassembled:
27+
; block0: ; offset 0x0
28+
; pushq %rbp
29+
; movq %rsp, %rbp
30+
; block1: ; offset 0x4
31+
; pcmpeqd %xmm0, %xmm0
32+
; movq %rbp, %rsp
33+
; popq %rbp
34+
; retq
35+
36+
function %scalar_to_vector() -> i16x8 fast {
37+
fn0 = %munge_xmm0() -> i16x8 fast
38+
block0:
39+
v0 = call fn0()
40+
v1 = iconst.i16 42
41+
v2 = scalar_to_vector.i16x8 v1
42+
return v2
43+
}
44+
45+
; VCode:
46+
; pushq %rbp
47+
; movq %rsp, %rbp
48+
; block0:
49+
; load_ext_name %munge_xmm0+0, %r8
50+
; call *%r8
51+
; movl $42, %r8d
52+
; uninit %xmm0
53+
; pxor %xmm0, %xmm0
54+
; pinsrw $0x0, %r8d, %xmm0
55+
; movq %rbp, %rsp
56+
; popq %rbp
57+
; ret
58+
;
59+
; Disassembled:
60+
; block0: ; offset 0x0
61+
; pushq %rbp
62+
; movq %rsp, %rbp
63+
; block1: ; offset 0x4
64+
; movabsq $0, %r8 ; reloc_external Abs8 %munge_xmm0 0
65+
; callq *%r8
66+
; movl $0x2a, %r8d
67+
; pxor %xmm0, %xmm0
68+
; pinsrw $0, %r8d, %xmm0
69+
; movq %rbp, %rsp
70+
; popq %rbp
71+
; retq
72+
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
;; When CLIF's `scalar_to_vector` operates on an `i16x8` vector, we need to be
2+
;; sure to zero out the upper bits before inserting the `i16` into the lowest
3+
;; lane. This test uses some ABI knowledge (both functions return their result
4+
;; in `xmm0`) to fill in all of the bits (`%munge_xmm0`) and see if they "show
5+
;; through" in the actual test, `%scalar_to_vector`. See
6+
;; https://github.com/bytecodealliance/wasmtime/issues/10906.
7+
8+
test interpret
9+
test run
10+
set opt_level=none
11+
set preserve_frame_pointers=true
12+
set enable_multi_ret_implicit_sret=true
13+
target x86_64
14+
15+
function %munge_xmm0() -> i16x8 fast {
16+
block0:
17+
v0 = vconst.i16x8 [0xffff 0xffff 0xffff 0xffff 0xffff 0xffff 0xffff 0xffff]
18+
return v0
19+
}
20+
21+
function %scalar_to_vector() -> i16x8 fast {
22+
fn0 = %munge_xmm0() -> i16x8 fast
23+
block0:
24+
v0 = call fn0()
25+
v1 = iconst.i16 42
26+
v2 = scalar_to_vector.i16x8 v1
27+
return v2
28+
}
29+
30+
; run: %scalar_to_vector() == [42 0 0 0 0 0 0 0]
31+

0 commit comments

Comments
 (0)