This repository was archived by the owner on Sep 8, 2025. It is now read-only.
File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 50585058
50595059(decl bitcast_gpr_to_xmm (u8 Gpr) Xmm)
50605060(rule (bitcast_gpr_to_xmm 16 src)
5061- (x64_pinsrw (xmm_uninit_value ) src 0))
5061+ (x64_pinsrw (xmm_zero $I16X8 ) src 0))
50625062(rule (bitcast_gpr_to_xmm 32 src)
50635063 (x64_movd_to_xmm src))
50645064(rule (bitcast_gpr_to_xmm 64 src)
Original file line number Diff line number Diff line change @@ -38,6 +38,7 @@ block0(v0: i16):
3838; movq %rsp, %rbp
3939; block0:
4040; uninit %xmm0
41+ ; pxor %xmm0, %xmm0
4142; pinsrw $0x0, %edi, %xmm0
4243; movq %rbp, %rsp
4344; popq %rbp
@@ -48,6 +49,7 @@ block0(v0: i16):
4849; pushq %rbp
4950; movq %rsp, %rbp
5051; block1: ; offset 0x4
52+ ; pxor %xmm0, %xmm0
5153; pinsrw $0, %edi, %xmm0
5254; movq %rbp, %rsp
5355; popq %rbp
@@ -405,6 +407,7 @@ block0(v0: i16):
405407; movq %rsp, %rbp
406408; block0:
407409; uninit %xmm0
410+ ; pxor %xmm0, %xmm0
408411; pinsrw $0x0, %edi, %xmm0
409412; movq %rbp, %rsp
410413; popq %rbp
@@ -415,6 +418,7 @@ block0(v0: i16):
415418; pushq %rbp
416419; movq %rsp, %rbp
417420; block1: ; offset 0x4
421+ ; pxor %xmm0, %xmm0
418422; pinsrw $0, %edi, %xmm0
419423; movq %rbp, %rsp
420424; popq %rbp
Original file line number Diff line number Diff line change @@ -39,6 +39,7 @@ block0():
3939; block0:
4040; movl $15360, %esi
4141; uninit %xmm0
42+ ; pxor %xmm0, %xmm0
4243; pinsrw $0x0, %esi, %xmm0
4344; movq %rbp, %rsp
4445; popq %rbp
@@ -50,6 +51,7 @@ block0():
5051; movq %rsp, %rbp
5152; block1: ; offset 0x4
5253; movl $0x3c00, %esi
54+ ; pxor %xmm0, %xmm0
5355; pinsrw $0, %esi, %xmm0
5456; movq %rbp, %rsp
5557; popq %rbp
Original file line number Diff line number Diff line change 1+ ;; This has an associated runtest; this test explains that one by disassembling
2+ ;; the test case here.
3+
4+ test compile precise-output
5+ set opt_level=none
6+ set preserve_frame_pointers=true
7+ set enable_multi_ret_implicit_sret=true
8+ target x86_64
9+
10+ function %munge_xmm0() -> i16x8 fast {
11+ block0:
12+ v0 = vconst.i16x8 [0xffff 0xffff 0xffff 0xffff 0xffff 0xffff 0xffff 0xffff]
13+ return v0
14+ }
15+
16+ ; VCode:
17+ ; pushq %rbp
18+ ; movq %rsp, %rbp
19+ ; block0:
20+ ; uninit %xmm0
21+ ; pcmpeqd %xmm0, %xmm0
22+ ; movq %rbp, %rsp
23+ ; popq %rbp
24+ ; ret
25+ ;
26+ ; Disassembled:
27+ ; block0: ; offset 0x0
28+ ; pushq %rbp
29+ ; movq %rsp, %rbp
30+ ; block1: ; offset 0x4
31+ ; pcmpeqd %xmm0, %xmm0
32+ ; movq %rbp, %rsp
33+ ; popq %rbp
34+ ; retq
35+
36+ function %scalar_to_vector() -> i16x8 fast {
37+ fn0 = %munge_xmm0() -> i16x8 fast
38+ block0:
39+ v0 = call fn0()
40+ v1 = iconst.i16 42
41+ v2 = scalar_to_vector.i16x8 v1
42+ return v2
43+ }
44+
45+ ; VCode:
46+ ; pushq %rbp
47+ ; movq %rsp, %rbp
48+ ; block0:
49+ ; load_ext_name %munge_xmm0+0, %r8
50+ ; call *%r8
51+ ; movl $42, %r8d
52+ ; uninit %xmm0
53+ ; pxor %xmm0, %xmm0
54+ ; pinsrw $0x0, %r8d, %xmm0
55+ ; movq %rbp, %rsp
56+ ; popq %rbp
57+ ; ret
58+ ;
59+ ; Disassembled:
60+ ; block0: ; offset 0x0
61+ ; pushq %rbp
62+ ; movq %rsp, %rbp
63+ ; block1: ; offset 0x4
64+ ; movabsq $0, %r8 ; reloc_external Abs8 %munge_xmm0 0
65+ ; callq *%r8
66+ ; movl $0x2a, %r8d
67+ ; pxor %xmm0, %xmm0
68+ ; pinsrw $0, %r8d, %xmm0
69+ ; movq %rbp, %rsp
70+ ; popq %rbp
71+ ; retq
72+
Original file line number Diff line number Diff line change 1+ ;; When CLIF's `scalar_to_vector` operates on an `i16x8` vector, we need to be
2+ ;; sure to zero out the upper bits before inserting the `i16` into the lowest
3+ ;; lane. This test uses some ABI knowledge (both functions return their result
4+ ;; in `xmm0`) to fill in all of the bits (`%munge_xmm0`) and see if they "show
5+ ;; through" in the actual test, `%scalar_to_vector`. See
6+ ;; https://github.com/bytecodealliance/wasmtime/issues/10906.
7+
8+ test interpret
9+ test run
10+ set opt_level=none
11+ set preserve_frame_pointers=true
12+ set enable_multi_ret_implicit_sret=true
13+ target x86_64
14+
15+ function %munge_xmm0() -> i16x8 fast {
16+ block0:
17+ v0 = vconst.i16x8 [0xffff 0xffff 0xffff 0xffff 0xffff 0xffff 0xffff 0xffff]
18+ return v0
19+ }
20+
21+ function %scalar_to_vector() -> i16x8 fast {
22+ fn0 = %munge_xmm0() -> i16x8 fast
23+ block0:
24+ v0 = call fn0()
25+ v1 = iconst.i16 42
26+ v2 = scalar_to_vector.i16x8 v1
27+ return v2
28+ }
29+
30+ ; run: %scalar_to_vector() == [42 0 0 0 0 0 0 0]
31+
You can’t perform that action at this time.
0 commit comments