Skip to content

Commit f48a620

Browse files
authored
Add shuf and pshuf variants to the new assembler (#11135)
1 parent 421136d commit f48a620

11 files changed

Lines changed: 40 additions & 44 deletions

File tree

cranelift/assembler-x64/meta/src/instructions/lanes.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,15 @@ pub fn list() -> Vec<Inst> {
6262
inst("vblendvpd", fmt("RVMR", [w(xmm1), r(xmm2), r(xmm_m128), r(xmm3)]), vex(L128)._66()._0f3a().w0().op(0x4B).r().is4(), _64b | compat | avx),
6363

6464
// Shuffle lanes in various ways.
65+
inst("shufpd", fmt("A", [rw(xmm1), r(align(xmm_m128)), r(imm8)]), rex([0x66, 0x0F, 0xC6]).ib(), _64b | compat | sse2).alt(avx, "vshufpd_b"),
66+
inst("vshufpd", fmt("B", [w(xmm1), r(xmm2), r(xmm_m128), r(imm8)]), vex(L128)._66()._0f().ib().op(0xC6), _64b | compat | avx),
67+
inst("shufps", fmt("A", [rw(xmm1), r(align(xmm_m128)), r(imm8)]), rex([0x0F, 0xC6]).ib(), _64b | compat | sse).alt(avx, "vshufps_b"),
68+
inst("vshufps", fmt("B", [w(xmm1), r(xmm2), r(xmm_m128), r(imm8)]), vex(L128)._0f().ib().op(0xC6), _64b | compat | avx),
69+
inst("pshufb", fmt("A", [rw(xmm1), r(align(xmm_m128))]), rex([0x66, 0x0F, 0x38, 0x00]), _64b | compat | ssse3).alt(avx, "vpshufb_b"),
6570
inst("pshufd", fmt("A", [w(xmm1), r(align(xmm_m128)), r(imm8)]), rex([0x66, 0x0F, 0x70]).r().ib(), _64b | compat | sse2).alt(avx, "vpshufd_a"),
6671
inst("pshuflw", fmt("A", [w(xmm1), r(align(xmm_m128)), r(imm8)]), rex([0xF2, 0x0F, 0x70]).r().ib(), _64b | compat | sse2).alt(avx, "vpshuflw_a"),
6772
inst("pshufhw", fmt("A", [w(xmm1), r(align(xmm_m128)), r(imm8)]), rex([0xF3, 0x0F, 0x70]).r().ib(), _64b | compat | sse2).alt(avx, "vpshufhw_a"),
73+
inst("vpshufb", fmt("B", [w(xmm1), r(xmm2), r(xmm_m128)]), vex(L128)._66()._0f38().op(0x00), _64b | compat | avx),
6874
inst("vpshufd", fmt("A", [w(xmm1), r(xmm_m128), r(imm8)]), vex(L128)._66()._0f().op(0x70).r().ib(), _64b | compat | avx),
6975
inst("vpshuflw", fmt("A", [w(xmm1), r(xmm_m128), r(imm8)]), vex(L128)._f2()._0f().op(0x70).r().ib(), _64b | compat | avx),
7076
inst("vpshufhw", fmt("A", [w(xmm1), r(xmm_m128), r(imm8)]), vex(L128)._f3()._0f().op(0x70).r().ib(), _64b | compat | avx),

cranelift/codegen/src/isa/x64/inst.isle

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -460,11 +460,7 @@
460460
(rule (operand_size_bits (OperandSize.Size32)) 32)
461461
(rule (operand_size_bits (OperandSize.Size64)) 64)
462462

463-
(type SseOpcode extern
464-
(enum Insertps
465-
Pshufb
466-
Shufps
467-
))
463+
(type SseOpcode extern (enum Insertps))
468464

469465
(type RegMemImm extern
470466
(enum
@@ -2881,11 +2877,15 @@
28812877

28822878
;; Helper for creating `pshufb` instructions.
28832879
(decl x64_pshufb (Xmm XmmMem) Xmm)
2884-
(rule 0 (x64_pshufb src1 src2)
2885-
(xmm_rm_r (SseOpcode.Pshufb) src1 src2))
2886-
(rule 1 (x64_pshufb src1 src2)
2887-
(if-let true (use_avx))
2888-
(xmm_rmir_vex (AvxOpcode.Vpshufb) src1 src2))
2880+
(rule (x64_pshufb src1 src2) (x64_pshufb_a_or_avx src1 src2))
2881+
2882+
;; Helper for creating `shufpd` instructions.
2883+
(decl x64_shufpd (Xmm XmmMem u8) Xmm)
2884+
(rule (x64_shufpd src1 src2 byte) (x64_shufpd_a_or_avx src1 src2 byte))
2885+
2886+
;; Helper for creating `shufps` instructions.
2887+
(decl x64_shufps (Xmm XmmMem u8) Xmm)
2888+
(rule (x64_shufps src1 src2 byte) (x64_shufps_a_or_avx src1 src2 byte))
28892889

28902890
;; Helper for creating `pshuflw` instructions.
28912891
(decl x64_pshuflw (XmmMem u8) Xmm)
@@ -2901,17 +2901,7 @@
29012901
(if-let true (use_avx))
29022902
(x64_vpshufhw_a src imm))
29032903

2904-
;; Helper for creating `shufps` instructions.
2905-
(decl x64_shufps (Xmm XmmMem u8) Xmm)
2906-
(rule 0 (x64_shufps src1 src2 byte)
2907-
(xmm_rm_r_imm (SseOpcode.Shufps)
2908-
src1
2909-
src2
2910-
byte
2911-
(OperandSize.Size32)))
2912-
(rule 1 (x64_shufps src1 src2 byte)
2913-
(if-let true (use_avx))
2914-
(xmm_rmr_imm_vex (AvxOpcode.Vshufps) src1 src2 byte))
2904+
29152905

29162906
;; Helper for creating `vcvtudq2ps` instructions.
29172907
(decl x64_vcvtudq2ps (XmmMem) Xmm)

cranelift/filetests/filetests/isa/aarch64/nan-canonicalization.clif

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ block0(v0: f32x4, v1: f32x4):
1515
; addps %xmm1, %xmm0
1616
; movl $0x7fc00000, %r10d
1717
; movd %r10d, %xmm7
18-
; shufps $0, %xmm7, const(0), %xmm7
18+
; shufps $0x0, (%rip), %xmm7
1919
; movdqa %xmm0, %xmm1
2020
; cmpunordps %xmm0, %xmm1
2121
; movdqa %xmm0, %xmm2

cranelift/filetests/filetests/isa/x64/nan-canonicalization-sse41.clif

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ block0(v0: f32x4, v1: f32x4):
1515
; addps %xmm1, %xmm0
1616
; movl $0x7fc00000, %r10d
1717
; movd %r10d, %xmm7
18-
; shufps $0, %xmm7, const(0), %xmm7
18+
; shufps $0x0, (%rip), %xmm7
1919
; movdqa %xmm0, %xmm1
2020
; cmpunordps %xmm0, %xmm1
2121
; movdqa %xmm0, %xmm2

cranelift/filetests/filetests/isa/x64/nan-canonicalization.clif

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ block0(v0: f32x4, v1: f32x4):
1616
; movdqa %xmm0, %xmm1
1717
; movl $0x7fc00000, %esi
1818
; movd %esi, %xmm5
19-
; shufps $0, %xmm5, const(0), %xmm5
19+
; shufps $0x0, (%rip), %xmm5
2020
; cmpunordps %xmm1, %xmm0
2121
; andps %xmm0, %xmm5
2222
; andnps %xmm1, %xmm0

cranelift/filetests/filetests/isa/x64/shuffle.clif

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ block0(v0: i32x4, v1: i32x4):
204204
; pushq %rbp
205205
; movq %rsp, %rbp
206206
; block0:
207-
; shufps $94, %xmm0, %xmm1, %xmm0
207+
; shufps $0x5e, %xmm1, %xmm0
208208
; movq %rbp, %rsp
209209
; popq %rbp
210210
; retq
@@ -344,7 +344,7 @@ block0(v0: i32x4, v1: i32x4):
344344
; pushq %rbp
345345
; movq %rsp, %rbp
346346
; block0:
347-
; shufps $251, %xmm0, %xmm1, %xmm0
347+
; shufps $0xfb, %xmm1, %xmm0
348348
; movq %rbp, %rsp
349349
; popq %rbp
350350
; retq
@@ -374,7 +374,7 @@ block0(v0: i32x4, v1: i32x4):
374374
; block0:
375375
; movdqa %xmm0, %xmm4
376376
; movdqa %xmm1, %xmm0
377-
; shufps $6, %xmm0, %xmm4, %xmm0
377+
; shufps $0x6, %xmm4, %xmm0
378378
; movq %rbp, %rsp
379379
; popq %rbp
380380
; retq
@@ -627,7 +627,7 @@ block0(v0: i8x16, v1: i8x16):
627627
; block0:
628628
; uninit %xmm4
629629
; pxor %xmm4, %xmm4
630-
; pshufb %xmm0, %xmm4, %xmm0
630+
; pshufb %xmm4, %xmm0
631631
; movq %rbp, %rsp
632632
; popq %rbp
633633
; retq

cranelift/filetests/filetests/isa/x64/simd-arith-avx.clif

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1312,7 +1312,7 @@ block0(v0: i8):
13121312
; vmovd %edi, %xmm2
13131313
; uninit %xmm4
13141314
; vpxor %xmm4, %xmm4, %xmm6
1315-
; vpshufb %xmm2, %xmm6, %xmm0
1315+
; vpshufb %xmm6, %xmm2, %xmm0
13161316
; movq %rbp, %rsp
13171317
; popq %rbp
13181318
; retq
@@ -1347,7 +1347,7 @@ block0(v0: f64x2):
13471347
; vminpd (%rip), %xmm6, %xmm0
13481348
; vroundpd $0x3, %xmm0, %xmm2
13491349
; vaddpd (%rip), %xmm2, %xmm5
1350-
; vshufps $136, %xmm5, %xmm4, %xmm0
1350+
; vshufps $0x88, %xmm4, %xmm5, %xmm0
13511351
; movq %rbp, %rsp
13521352
; popq %rbp
13531353
; retq

cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ block0:
1818
; uninit %xmm0
1919
; pxor %xmm0, %xmm0
2020
; movdqu (%rip), %xmm3
21-
; pshufb %xmm0, const(0), %xmm0
22-
; pshufb %xmm3, const(1), %xmm3
21+
; pshufb (%rip), %xmm0
22+
; pshufb (%rip), %xmm3
2323
; por %xmm3, %xmm0
2424
; movq %rbp, %rsp
2525
; popq %rbp
@@ -71,7 +71,7 @@ block0:
7171
; movq %rsp, %rbp
7272
; block0:
7373
; movdqu (%rip), %xmm0
74-
; pshufb %xmm0, const(0), %xmm0
74+
; pshufb (%rip), %xmm0
7575
; movq %rbp, %rsp
7676
; popq %rbp
7777
; retq
@@ -121,7 +121,7 @@ block0:
121121
; movdqu (%rip), %xmm0
122122
; movdqu (%rip), %xmm1
123123
; paddusb (%rip), %xmm1
124-
; pshufb %xmm0, %xmm1, %xmm0
124+
; pshufb %xmm1, %xmm0
125125
; movq %rbp, %rsp
126126
; popq %rbp
127127
; retq
@@ -160,7 +160,7 @@ block0(v0: i8):
160160
; movd %edi, %xmm0
161161
; uninit %xmm5
162162
; pxor %xmm5, %xmm5
163-
; pshufb %xmm0, %xmm5, %xmm0
163+
; pshufb %xmm5, %xmm0
164164
; movq %rbp, %rsp
165165
; popq %rbp
166166
; retq

cranelift/filetests/filetests/isa/x64/simd-splat-avx.clif

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ block0(v0: i8):
1414
; vmovd %edi, %xmm2
1515
; uninit %xmm4
1616
; vpxor %xmm4, %xmm4, %xmm6
17-
; vpshufb %xmm2, %xmm6, %xmm0
17+
; vpshufb %xmm6, %xmm2, %xmm0
1818
; movq %rbp, %rsp
1919
; popq %rbp
2020
; retq
@@ -124,7 +124,7 @@ block0(v0: f32):
124124
; pushq %rbp
125125
; movq %rsp, %rbp
126126
; block0:
127-
; vshufps $0, %xmm0, %xmm0, %xmm0
127+
; vshufps $0x0, %xmm0, %xmm0, %xmm0
128128
; movq %rbp, %rsp
129129
; popq %rbp
130130
; retq
@@ -179,7 +179,7 @@ block0(v0: i64):
179179
; vpinsrb $0x0, (%rdi), %xmm2, %xmm4
180180
; uninit %xmm6
181181
; vpxor %xmm6, %xmm6, %xmm0
182-
; vpshufb %xmm4, %xmm0, %xmm0
182+
; vpshufb %xmm0, %xmm4, %xmm0
183183
; movq %rbp, %rsp
184184
; popq %rbp
185185
; retq

cranelift/filetests/filetests/isa/x64/simd-splat.clif

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ block0(v0: i8):
1414
; movd %edi, %xmm0
1515
; uninit %xmm5
1616
; pxor %xmm5, %xmm5
17-
; pshufb %xmm0, %xmm5, %xmm0
17+
; pshufb %xmm5, %xmm0
1818
; movq %rbp, %rsp
1919
; popq %rbp
2020
; retq
@@ -124,7 +124,7 @@ block0(v0: f32):
124124
; pushq %rbp
125125
; movq %rsp, %rbp
126126
; block0:
127-
; shufps $0, %xmm0, %xmm0, %xmm0
127+
; shufps $0x0, %xmm0, %xmm0
128128
; movq %rbp, %rsp
129129
; popq %rbp
130130
; retq
@@ -179,7 +179,7 @@ block0(v0: i64):
179179
; pinsrb $0x0, (%rdi), %xmm0
180180
; uninit %xmm7
181181
; pxor %xmm7, %xmm7
182-
; pshufb %xmm0, %xmm7, %xmm0
182+
; pshufb %xmm7, %xmm0
183183
; movq %rbp, %rsp
184184
; popq %rbp
185185
; retq
@@ -239,7 +239,7 @@ block0(v0: i64):
239239
; movq %rsp, %rbp
240240
; block0:
241241
; movss (%rdi), %xmm0
242-
; shufps $0, %xmm0, %xmm0, %xmm0
242+
; shufps $0x0, %xmm0, %xmm0
243243
; movq %rbp, %rsp
244244
; popq %rbp
245245
; retq
@@ -293,7 +293,7 @@ block0(v0: i64):
293293
; movq %rsp, %rbp
294294
; block0:
295295
; movss (%rdi), %xmm0
296-
; shufps $0, %xmm0, %xmm0, %xmm0
296+
; shufps $0x0, %xmm0, %xmm0
297297
; movq %rbp, %rsp
298298
; popq %rbp
299299
; retq

0 commit comments

Comments
 (0)