Skip to content
This repository was archived by the owner on Sep 8, 2025. It is now read-only.

Commit 0de0089

Browse files
authored
x64: convert vpextr* instructions (#11094)
This replaces all AVX versions of XMM lane extraction to use the new assembler. This removes the `Inst::XmmMovRMImmVex` and `Inst::XmmToGprImmVex` variants.
1 parent 903ec89 commit 0de0089

13 files changed

Lines changed: 94 additions & 217 deletions

File tree

cranelift/assembler-x64/meta/src/generate/format.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ impl dsl::Format {
282282
}
283283
}
284284
},
285-
[Reg(reg), Reg(rm)] => {
285+
[Reg(reg), Reg(rm)] | [Reg(reg), Reg(rm), Imm(_)] => {
286286
assert!(!vex.is4);
287287
fmtln!(f, "let reg = self.{reg}.enc();");
288288
fmtln!(f, "let rm = self.{rm}.encode_bx_regs();");
@@ -292,7 +292,7 @@ impl dsl::Format {
292292
rm: *rm,
293293
}
294294
}
295-
[Reg(reg), Mem(rm)] | [Mem(rm), Reg(reg)] => {
295+
[Reg(reg), Mem(rm)] | [Mem(rm), Reg(reg)] | [RegMem(rm), Reg(reg), Imm(_)] => {
296296
assert!(!vex.is4);
297297
fmtln!(f, "let reg = self.{reg}.enc();");
298298
fmtln!(f, "let rm = self.{rm}.encode_bx_regs();");

cranelift/assembler-x64/meta/src/instructions.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,16 @@ fn check_sse_matches_avx(sse_inst: &Inst, avx_inst: &Inst) {
146146
],
147147
) => {}
148148
(
149-
[(Write, Reg(_)), (Read, Reg(_) | RegMem(_)), (Read, Imm(_))],
150-
[(Write, Reg(_)), (Read, Reg(_) | RegMem(_)), (Read, Imm(_))],
149+
[
150+
(Write, Reg(_) | RegMem(_)),
151+
(Read, Reg(_) | RegMem(_)),
152+
(Read, Imm(_)),
153+
],
154+
[
155+
(Write, Reg(_) | RegMem(_)),
156+
(Read, Reg(_) | RegMem(_)),
157+
(Read, Imm(_)),
158+
],
151159
) => {}
152160
(
153161
[(ReadWrite, Reg(_)), (Read, RegMem(_)), (Read, Imm(_))],

cranelift/assembler-x64/meta/src/instructions/lanes.rs

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,22 @@ pub fn list() -> Vec<Inst> {
66
// Note that `p{extr,ins}r{w,b}` below operate on 32-bit registers but a
77
// smaller-width memory location. This means that disassembly in Capstone
88
// doesn't match `rm8`, for example. For now pretend both of these are
9-
// `rm32` to get diassembly matching Capstone.
9+
// `rm32` to get disassembly matching Capstone.
1010
let r32m8 = rm32;
1111
let r32m16 = rm32;
1212

1313
vec![
1414
// Extract from a single XMM lane.
15-
inst("pextrb", fmt("A", [w(r32m8), r(xmm2), r(imm8)]), rex([0x66, 0x0F, 0x3A, 0x14]).r().ib(), _64b | compat | sse41),
16-
inst("pextrw", fmt("A", [w(r32), r(xmm2), r(imm8)]), rex([0x66, 0x0F, 0xC5]).r().ib(), _64b | compat | sse2),
17-
inst("pextrw", fmt("B", [w(r32m16), r(xmm2), r(imm8)]), rex([0x66, 0x0F, 0x3A, 0x15]).r().ib(), _64b | compat | sse41),
18-
inst("pextrd", fmt("A", [w(rm32), r(xmm2), r(imm8)]), rex([0x66, 0x0F, 0x3A, 0x16]).r().ib(), _64b | compat | sse41),
19-
inst("pextrq", fmt("A", [w(rm64), r(xmm2), r(imm8)]), rex([0x66, 0x0F, 0x3A, 0x16]).w().r().ib(), _64b | sse41),
15+
inst("pextrb", fmt("A", [w(r32m8), r(xmm2), r(imm8)]), rex([0x66, 0x0F, 0x3A, 0x14]).r().ib(), _64b | compat | sse41).alt(avx, "vpextrb_a"),
16+
inst("pextrw", fmt("A", [w(r32), r(xmm2), r(imm8)]), rex([0x66, 0x0F, 0xC5]).r().ib(), _64b | compat | sse2).alt(avx, "vpextrw_a"),
17+
inst("pextrw", fmt("B", [w(r32m16), r(xmm2), r(imm8)]), rex([0x66, 0x0F, 0x3A, 0x15]).r().ib(), _64b | compat | sse41).alt(avx, "vpextrw_b"),
18+
inst("pextrd", fmt("A", [w(rm32), r(xmm2), r(imm8)]), rex([0x66, 0x0F, 0x3A, 0x16]).r().ib(), _64b | compat | sse41).alt(avx, "vpextrd_a"),
19+
inst("pextrq", fmt("A", [w(rm64), r(xmm2), r(imm8)]), rex([0x66, 0x0F, 0x3A, 0x16]).w().r().ib(), _64b | sse41).alt(avx, "vpextrq_a"),
20+
inst("vpextrb", fmt("A", [w(r32m8), r(xmm2), r(imm8)]), vex(L128)._66()._0f3a().w0().op(0x14).r().ib(), _64b | compat | avx),
21+
inst("vpextrw", fmt("A", [w(r32), r(xmm2), r(imm8)]), vex(L128)._66()._0f().w0().op(0xC5).r().ib(), _64b | compat | avx),
22+
inst("vpextrw", fmt("B", [w(r32m16), r(xmm2), r(imm8)]), vex(L128)._66()._0f3a().w0().op(0x15).r().ib(), _64b | compat | avx),
23+
inst("vpextrd", fmt("A", [w(rm32), r(xmm2), r(imm8)]), vex(L128)._66()._0f3a().w0().op(0x16).r().ib(), _64b | compat | avx),
24+
inst("vpextrq", fmt("A", [w(rm64), r(xmm2), r(imm8)]), vex(L128)._66()._0f3a().w1().op(0x16).r().ib(), _64b | compat | avx),
2025

2126
// Insert into a single XMM lane.
2227
inst("pinsrb", fmt("A", [rw(xmm1), r(r32m8), r(imm8)]), rex([0x66, 0x0F, 0x3A, 0x20]).r().ib(), _64b | compat | sse41),

cranelift/codegen/src/isa/x64/inst.isle

Lines changed: 9 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -112,16 +112,6 @@
112112
(XmmMovRMVex (op AvxOpcode)
113113
(src Xmm)
114114
(dst SyntheticAmode))
115-
(XmmMovRMImmVex (op AvxOpcode)
116-
(src Xmm)
117-
(dst SyntheticAmode)
118-
(imm u8))
119-
120-
;; XMM (scalar) unary op (from xmm to integer reg): vpextr{w,b,d,q}
121-
(XmmToGprImmVex (op AvxOpcode)
122-
(src Xmm)
123-
(dst WritableGpr)
124-
(imm u8))
125115

126116
;; Float comparisons/tests: cmp (b w l q) (reg addr imm) reg.
127117
(XmmCmpRmRVex (op AvxOpcode)
@@ -929,10 +919,6 @@
929919
Vmovups
930920
Vmovupd
931921
Vmovdqu
932-
Vpextrb
933-
Vpextrw
934-
Vpextrd
935-
Vpextrq
936922
Vpblendw
937923
Vsqrtss
938924
Vsqrtsd
@@ -1471,13 +1457,6 @@
14711457
(_ Unit (emit (MInst.XmmUnaryRmRImmEvex op src dst imm))))
14721458
dst))
14731459

1474-
;; Helper for creating `MInst.XmmToGprImmVex` instructions.
1475-
(decl xmm_to_gpr_imm_vex (AvxOpcode Xmm u8) Gpr)
1476-
(rule (xmm_to_gpr_imm_vex op src imm)
1477-
(let ((dst WritableGpr (temp_writable_gpr))
1478-
(_ Unit (emit (MInst.XmmToGprImmVex op src dst imm))))
1479-
dst))
1480-
14811460
;; Helper for creating `xmm_min_max_seq` pseudo-instructions.
14821461
(decl xmm_min_max_seq (Type bool Xmm Xmm) Xmm)
14831462
(rule (xmm_min_max_seq ty is_min lhs rhs)
@@ -1903,10 +1882,6 @@
19031882
(rule (xmm_movrm_vex op addr data)
19041883
(SideEffectNoResult.Inst (MInst.XmmMovRMVex op data addr)))
19051884

1906-
(decl xmm_movrm_imm_vex (AvxOpcode SyntheticAmode Xmm u8) SideEffectNoResult)
1907-
(rule (xmm_movrm_imm_vex op addr data imm)
1908-
(SideEffectNoResult.Inst (MInst.XmmMovRMImmVex op data addr imm)))
1909-
19101885
;; Load a constant into an XMM register.
19111886
(decl x64_xmm_load_const (Type VCodeConstant) Xmm)
19121887
(rule (x64_xmm_load_const ty const)
@@ -3573,57 +3548,30 @@
35733548
(rule (x64_vpsraq_imm src imm)
35743549
(xmm_unary_rm_r_imm_evex (Avx512Opcode.VpsraqImm) src imm))
35753550

3576-
;; Helper for creating `pextrb` instructions.
3551+
;; Helper for creating `pextr*` instructions.
35773552
(decl x64_pextrb (Xmm u8) Gpr)
3578-
(rule (x64_pextrb src lane) (x64_pextrb_a src lane))
3579-
(rule 1 (x64_pextrb src lane)
3580-
(if-let true (use_avx))
3581-
(xmm_to_gpr_imm_vex (AvxOpcode.Vpextrb) src lane))
3553+
(rule (x64_pextrb src lane) (x64_pextrb_a_or_avx src lane))
35823554

35833555
(decl x64_pextrb_store (Amode Xmm u8) SideEffectNoResult)
3584-
(rule (x64_pextrb_store addr src lane) (x64_pextrb_a_mem addr src lane))
3585-
(rule 1 (x64_pextrb_store addr src lane)
3586-
(if-let true (use_avx))
3587-
(xmm_movrm_imm_vex (AvxOpcode.Vpextrb) addr src lane))
3556+
(rule (x64_pextrb_store addr src lane) (x64_pextrb_a_mem_or_avx addr src lane))
35883557

3589-
;; Helper for creating `pextrw` instructions.
35903558
(decl x64_pextrw (Xmm u8) Gpr)
3591-
(rule (x64_pextrw src lane) (x64_pextrw_a src lane))
3592-
(rule 1 (x64_pextrw src lane)
3593-
(if-let true (use_avx))
3594-
(xmm_to_gpr_imm_vex (AvxOpcode.Vpextrw) src lane))
3559+
(rule (x64_pextrw src lane) (x64_pextrw_a_or_avx src lane))
35953560

35963561
(decl x64_pextrw_store (Amode Xmm u8) SideEffectNoResult)
3597-
(rule (x64_pextrw_store addr src lane) (x64_pextrw_b_mem addr src lane))
3598-
(rule 1 (x64_pextrw_store addr src lane)
3599-
(if-let true (use_avx))
3600-
(xmm_movrm_imm_vex (AvxOpcode.Vpextrw) addr src lane))
3562+
(rule (x64_pextrw_store addr src lane) (x64_pextrw_b_mem_or_avx addr src lane))
36013563

3602-
;; Helper for creating `pextrd` instructions.
36033564
(decl x64_pextrd (Xmm u8) Gpr)
3604-
(rule (x64_pextrd src lane) (x64_pextrd_a src lane))
3605-
(rule 1 (x64_pextrd src lane)
3606-
(if-let true (use_avx))
3607-
(xmm_to_gpr_imm_vex (AvxOpcode.Vpextrd) src lane))
3565+
(rule (x64_pextrd src lane) (x64_pextrd_a_or_avx src lane))
36083566

36093567
(decl x64_pextrd_store (Amode Xmm u8) SideEffectNoResult)
3610-
(rule (x64_pextrd_store addr src lane) (x64_pextrd_a_mem addr src lane))
3611-
(rule 1 (x64_pextrd_store addr src lane)
3612-
(if-let true (use_avx))
3613-
(xmm_movrm_imm_vex (AvxOpcode.Vpextrd) addr src lane))
3568+
(rule (x64_pextrd_store addr src lane) (x64_pextrd_a_mem_or_avx addr src lane))
36143569

3615-
;; Helper for creating `pextrq` instructions.
36163570
(decl x64_pextrq (Xmm u8) Gpr)
3617-
(rule (x64_pextrq src lane) (x64_pextrq_a src lane))
3618-
(rule 1 (x64_pextrq src lane)
3619-
(if-let true (use_avx))
3620-
(xmm_to_gpr_imm_vex (AvxOpcode.Vpextrq) src lane))
3571+
(rule (x64_pextrq src lane) (x64_pextrq_a_or_avx src lane))
36213572

36223573
(decl x64_pextrq_store (Amode Xmm u8) SideEffectNoResult)
3623-
(rule (x64_pextrq_store addr src lane) (x64_pextrq_a_mem addr src lane))
3624-
(rule 1 (x64_pextrq_store addr src lane)
3625-
(if-let true (use_avx))
3626-
(xmm_movrm_imm_vex (AvxOpcode.Vpextrq) addr src lane))
3574+
(rule (x64_pextrq_store addr src lane) (x64_pextrq_a_mem_or_avx addr src lane))
36273575

36283576
;; Helper for creating `pmovmskb` instructions.
36293577
(decl x64_pmovmskb (Xmm) Gpr)

cranelift/codegen/src/isa/x64/inst/args.rs

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1078,10 +1078,6 @@ impl AvxOpcode {
10781078
| AvxOpcode::Vmovups
10791079
| AvxOpcode::Vmovupd
10801080
| AvxOpcode::Vmovdqu
1081-
| AvxOpcode::Vpextrb
1082-
| AvxOpcode::Vpextrw
1083-
| AvxOpcode::Vpextrd
1084-
| AvxOpcode::Vpextrq
10851081
| AvxOpcode::Vpblendw
10861082
| AvxOpcode::Vsqrtss
10871083
| AvxOpcode::Vsqrtsd

cranelift/codegen/src/isa/x64/inst/emit.rs

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1397,52 +1397,6 @@ pub(crate) fn emit(
13971397
.encode(sink);
13981398
}
13991399

1400-
Inst::XmmMovRMImmVex { op, src, dst, imm } => {
1401-
let src = src.to_reg();
1402-
let dst = dst.clone().finalize(state.frame_layout(), sink);
1403-
1404-
let (w, prefix, map, opcode) = match op {
1405-
AvxOpcode::Vpextrb => (false, LegacyPrefixes::_66, OpcodeMap::_0F3A, 0x14),
1406-
AvxOpcode::Vpextrw => (false, LegacyPrefixes::_66, OpcodeMap::_0F3A, 0x15),
1407-
AvxOpcode::Vpextrd => (false, LegacyPrefixes::_66, OpcodeMap::_0F3A, 0x16),
1408-
AvxOpcode::Vpextrq => (true, LegacyPrefixes::_66, OpcodeMap::_0F3A, 0x16),
1409-
_ => unimplemented!("Opcode {:?} not implemented", op),
1410-
};
1411-
VexInstruction::new()
1412-
.length(VexVectorLength::V128)
1413-
.w(w)
1414-
.prefix(prefix)
1415-
.map(map)
1416-
.opcode(opcode)
1417-
.rm(dst)
1418-
.reg(src.to_real_reg().unwrap().hw_enc())
1419-
.imm(*imm)
1420-
.encode(sink);
1421-
}
1422-
1423-
Inst::XmmToGprImmVex { op, src, dst, imm } => {
1424-
let src = src.to_reg();
1425-
let dst = dst.to_reg().to_reg();
1426-
1427-
let (w, prefix, map, opcode) = match op {
1428-
AvxOpcode::Vpextrb => (false, LegacyPrefixes::_66, OpcodeMap::_0F3A, 0x14),
1429-
AvxOpcode::Vpextrw => (false, LegacyPrefixes::_66, OpcodeMap::_0F3A, 0x15),
1430-
AvxOpcode::Vpextrd => (false, LegacyPrefixes::_66, OpcodeMap::_0F3A, 0x16),
1431-
AvxOpcode::Vpextrq => (true, LegacyPrefixes::_66, OpcodeMap::_0F3A, 0x16),
1432-
_ => unimplemented!("Opcode {:?} not implemented", op),
1433-
};
1434-
VexInstruction::new()
1435-
.length(VexVectorLength::V128)
1436-
.w(w)
1437-
.prefix(prefix)
1438-
.map(map)
1439-
.opcode(opcode)
1440-
.rm(dst.to_real_reg().unwrap().hw_enc())
1441-
.reg(src.to_real_reg().unwrap().hw_enc())
1442-
.imm(*imm)
1443-
.encode(sink);
1444-
}
1445-
14461400
Inst::XmmCmpRmRVex { op, src1, src2 } => {
14471401
let src1 = src1.to_reg();
14481402
let src2 = match src2.clone().to_reg_mem().clone() {

cranelift/codegen/src/isa/x64/inst/mod.rs

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,6 @@ impl Inst {
130130
| Inst::XmmRmRVex3 { op, .. }
131131
| Inst::XmmRmRImmVex { op, .. }
132132
| Inst::XmmMovRMVex { op, .. }
133-
| Inst::XmmMovRMImmVex { op, .. }
134-
| Inst::XmmToGprImmVex { op, .. }
135133
| Inst::XmmCmpRmRVex { op, .. } => op.available_from(),
136134

137135
Inst::External { inst } => {
@@ -584,15 +582,6 @@ impl PrettyPrint for Inst {
584582
format!("{op} {src}, {dst}")
585583
}
586584

587-
Inst::XmmMovRMImmVex {
588-
op, src, dst, imm, ..
589-
} => {
590-
let src = pretty_print_reg(src.to_reg(), 8);
591-
let dst = dst.pretty_print(8);
592-
let op = ljustify(op.to_string());
593-
format!("{op} ${imm}, {src}, {dst}")
594-
}
595-
596585
Inst::XmmRmR {
597586
op,
598587
src1,
@@ -753,13 +742,6 @@ impl PrettyPrint for Inst {
753742
format!("{op} {dst}")
754743
}
755744

756-
Inst::XmmToGprImmVex { op, src, dst, imm } => {
757-
let src = pretty_print_reg(src.to_reg(), 8);
758-
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8);
759-
let op = ljustify(op.to_string());
760-
format!("{op} ${imm}, {src}, {dst}")
761-
}
762-
763745
Inst::XmmCmpRmR { op, src1, src2 } => {
764746
let src1 = pretty_print_reg(src1.to_reg(), 8);
765747
let src2 = src2.pretty_print(8);
@@ -1316,7 +1298,7 @@ fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
13161298
collector.reg_use(lhs);
13171299
collector.reg_reuse_def(dst, 0); // Reuse RHS.
13181300
}
1319-
Inst::XmmMovRMVex { src, dst, .. } | Inst::XmmMovRMImmVex { src, dst, .. } => {
1301+
Inst::XmmMovRMVex { src, dst, .. } => {
13201302
collector.reg_use(src);
13211303
dst.get_operands(collector);
13221304
}
@@ -1338,10 +1320,6 @@ fn x64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
13381320
collector.reg_use(src);
13391321
collector.reg_fixed_nonallocatable(*dst);
13401322
}
1341-
Inst::XmmToGprImmVex { src, dst, .. } => {
1342-
collector.reg_use(src);
1343-
collector.reg_def(dst);
1344-
}
13451323
Inst::CvtUint64ToFloatSeq {
13461324
src,
13471325
dst,

cranelift/codegen/src/isa/x64/pcc.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -195,11 +195,7 @@ pub(crate) fn check(
195195
ensure_no_fact(vcode, dst.to_writable_reg().to_reg())
196196
}
197197

198-
Inst::XmmMovRMVex { ref dst, .. } | Inst::XmmMovRMImmVex { ref dst, .. } => {
199-
check_store(ctx, None, dst, vcode, I8X16)
200-
}
201-
202-
Inst::XmmToGprImmVex { dst, .. } => ensure_no_fact(vcode, dst.to_writable_reg().to_reg()),
198+
Inst::XmmMovRMVex { ref dst, .. } => check_store(ctx, None, dst, vcode, I8X16),
203199

204200
Inst::CvtUint64ToFloatSeq {
205201
dst,

cranelift/filetests/filetests/isa/x64/extractlane-avx.clif

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ block0(v0: i8x16):
1111
; pushq %rbp
1212
; movq %rsp, %rbp
1313
; block0:
14-
; vpextrb $1, %xmm0, %rax
14+
; vpextrb $0x1, %xmm0, %eax
1515
; movq %rbp, %rsp
1616
; popq %rbp
1717
; retq
@@ -36,7 +36,7 @@ block0(v0: i16x8):
3636
; pushq %rbp
3737
; movq %rsp, %rbp
3838
; block0:
39-
; vpextrw $1, %xmm0, %rax
39+
; vpextrw $0x1, %xmm0, %eax
4040
; movq %rbp, %rsp
4141
; popq %rbp
4242
; retq
@@ -61,7 +61,7 @@ block0(v0: i32x4):
6161
; pushq %rbp
6262
; movq %rsp, %rbp
6363
; block0:
64-
; vpextrd $1, %xmm0, %rax
64+
; vpextrd $0x1, %xmm0, %eax
6565
; movq %rbp, %rsp
6666
; popq %rbp
6767
; retq
@@ -86,7 +86,7 @@ block0(v0: i64x2):
8686
; pushq %rbp
8787
; movq %rsp, %rbp
8888
; block0:
89-
; vpextrq $1, %xmm0, %rax
89+
; vpextrq $0x1, %xmm0, %rax
9090
; movq %rbp, %rsp
9191
; popq %rbp
9292
; retq
@@ -162,7 +162,7 @@ block0(v0: i8x16, v1: i64):
162162
; pushq %rbp
163163
; movq %rsp, %rbp
164164
; block0:
165-
; vpextrb $0, %xmm0, 0(%rdi)
165+
; vpextrb $0x0, %xmm0, (%rdi)
166166
; movq %rbp, %rsp
167167
; popq %rbp
168168
; retq
@@ -188,7 +188,7 @@ block0(v0: i16x8, v1: i64):
188188
; pushq %rbp
189189
; movq %rsp, %rbp
190190
; block0:
191-
; vpextrw $0, %xmm0, 0(%rdi)
191+
; vpextrw $0x0, %xmm0, (%rdi)
192192
; movq %rbp, %rsp
193193
; popq %rbp
194194
; retq
@@ -214,7 +214,7 @@ block0(v0: i32x4, v1: i64):
214214
; pushq %rbp
215215
; movq %rsp, %rbp
216216
; block0:
217-
; vpextrd $0, %xmm0, 0(%rdi)
217+
; vpextrd $0x0, %xmm0, (%rdi)
218218
; movq %rbp, %rsp
219219
; popq %rbp
220220
; retq
@@ -266,7 +266,7 @@ block0(v0: i64x2, v1: i64):
266266
; pushq %rbp
267267
; movq %rsp, %rbp
268268
; block0:
269-
; vpextrq $0, %xmm0, 0(%rdi)
269+
; vpextrq $0x0, %xmm0, (%rdi)
270270
; movq %rbp, %rsp
271271
; popq %rbp
272272
; retq

0 commit comments

Comments
 (0)