Skip to content

Commit f85b70f

Browse files
Rollup merge of #153068 - sayantn:avxvnni, r=Amanieu
Require avxvnni for avx10.2 AVX10.2 supports masked (and 512-bit) versions of some intrinsics available in AVXVNNI, AVXVNNIINT8 and AVXVNNIINT16 (e.g. AVX10.2 introduces `_mm{,256,512}_{mask{z}}_dpbuud_epi32` corresponding to `_mm{,256}_dpbuud_epi32` from AVXVNNIINT8). But Intel (being Intel), didn't (at least not in SDM) enforce that AVX10.2 (or at least AVX10_VNNI_INT, which is a "discrete AVX10 feature", introduced alongside AVX10.2, and expected to house more such instructions) requires AVXVNNI etc. To make this (admittedly very Intel) situation a bit better, we can just require these features from the Rust frontend r? @Amanieu This also corrects a mistake in std-detect which allowed AVX10 to be enabled without AVX512F, in the (odd) case when F16C or FMA are not available (we require these for AVX512F because otherwise the LLVM assembler doesn't work)
2 parents 80d0e4b + db18ecf commit f85b70f

2 files changed

Lines changed: 42 additions & 34 deletions

File tree

compiler/rustc_target/src/target_features.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -392,7 +392,11 @@ static X86_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
392392
"avx512vpopcntdq",
393393
],
394394
),
395-
("avx10.2", Unstable(sym::avx10_target_feature), &["avx10.1"]),
395+
(
396+
"avx10.2",
397+
Unstable(sym::avx10_target_feature),
398+
&["avx10.1", "avxvnni", "avxvnniint8", "avxvnniint16"],
399+
),
396400
("avx512bf16", Stable, &["avx512bw"]),
397401
("avx512bitalg", Stable, &["avx512bw"]),
398402
("avx512bw", Stable, &["avx512f"]),

library/std_detect/src/detect/os/x86.rs

Lines changed: 37 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,28 @@ pub(crate) fn detect_features() -> cache::Initializer {
202202
// Test `XCR0.APX[19]` with the mask `0b1000_0000_0000_0000_0000 == 0x80000`
203203
let os_apx_support = xcr0 & 0x80000 == 0x80000;
204204

205+
if os_amx_support {
206+
enable(extended_features_edx, 24, Feature::amx_tile);
207+
enable(extended_features_edx, 25, Feature::amx_int8);
208+
enable(extended_features_edx, 22, Feature::amx_bf16);
209+
enable(extended_features_eax_leaf_1, 21, Feature::amx_fp16);
210+
enable(extended_features_edx_leaf_1, 8, Feature::amx_complex);
211+
212+
if max_basic_leaf >= 0x1e {
213+
let CpuidResult { eax: amx_feature_flags_eax, .. } =
214+
__cpuid_count(0x1e_u32, 1);
215+
216+
enable(amx_feature_flags_eax, 4, Feature::amx_fp8);
217+
enable(amx_feature_flags_eax, 6, Feature::amx_tf32);
218+
enable(amx_feature_flags_eax, 7, Feature::amx_avx512);
219+
enable(amx_feature_flags_eax, 8, Feature::amx_movrs);
220+
}
221+
}
222+
223+
if os_apx_support {
224+
enable(extended_features_edx_leaf_1, 21, Feature::apxf);
225+
}
226+
205227
// Only if the OS and the CPU support saving/restoring the AVX
206228
// registers we enable `xsave` support:
207229
if os_avx_support {
@@ -236,9 +258,10 @@ pub(crate) fn detect_features() -> cache::Initializer {
236258
enable(extended_features_ebx, 5, Feature::avx2);
237259

238260
// "Short" versions of AVX512 instructions
239-
enable(extended_features_eax_leaf_1, 4, Feature::avxvnni);
240-
enable(extended_features_eax_leaf_1, 23, Feature::avxifma);
241-
enable(extended_features_edx_leaf_1, 4, Feature::avxvnniint8);
261+
let avxvnni = enable(extended_features_eax_leaf_1, 4, Feature::avxvnni);
262+
let avxvnniint8 = enable(extended_features_eax_leaf_1, 23, Feature::avxifma);
263+
let avxvnniint16 =
264+
enable(extended_features_edx_leaf_1, 4, Feature::avxvnniint8);
242265
enable(extended_features_edx_leaf_1, 5, Feature::avxneconvert);
243266
enable(extended_features_edx_leaf_1, 10, Feature::avxvnniint16);
244267

@@ -269,37 +292,18 @@ pub(crate) fn detect_features() -> cache::Initializer {
269292
enable(extended_features_edx, 8, Feature::avx512vp2intersect);
270293
enable(extended_features_edx, 23, Feature::avx512fp16);
271294
enable(extended_features_eax_leaf_1, 5, Feature::avx512bf16);
272-
}
273-
}
274-
275-
if os_amx_support {
276-
enable(extended_features_edx, 24, Feature::amx_tile);
277-
enable(extended_features_edx, 25, Feature::amx_int8);
278-
enable(extended_features_edx, 22, Feature::amx_bf16);
279-
enable(extended_features_eax_leaf_1, 21, Feature::amx_fp16);
280-
enable(extended_features_edx_leaf_1, 8, Feature::amx_complex);
281-
282-
if max_basic_leaf >= 0x1e {
283-
let CpuidResult { eax: amx_feature_flags_eax, .. } =
284-
__cpuid_count(0x1e_u32, 1);
285-
286-
enable(amx_feature_flags_eax, 4, Feature::amx_fp8);
287-
enable(amx_feature_flags_eax, 6, Feature::amx_tf32);
288-
enable(amx_feature_flags_eax, 7, Feature::amx_avx512);
289-
enable(amx_feature_flags_eax, 8, Feature::amx_movrs);
290-
}
291-
}
292-
293-
if os_apx_support {
294-
enable(extended_features_edx_leaf_1, 21, Feature::apxf);
295-
}
296295

297-
let avx10_1 = enable(extended_features_edx_leaf_1, 19, Feature::avx10_1);
298-
if avx10_1 {
299-
let CpuidResult { ebx, .. } = __cpuid(0x24);
300-
let avx10_version = ebx & 0xff;
301-
if avx10_version >= 2 {
302-
value.set(Feature::avx10_2 as u32);
296+
let avx10_1 = enable(extended_features_edx_leaf_1, 19, Feature::avx10_1);
297+
if avx10_1 {
298+
let CpuidResult { ebx, .. } = __cpuid(0x24);
299+
let avx10_version = ebx & 0xff;
300+
301+
// AVX10.2 supports masked versions of dot-product instructions available in avxvnni etc,
302+
// so it doesn't make sense to have it without the unmasked versions
303+
if avx10_version >= 2 && avxvnni && avxvnniint8 && avxvnniint16 {
304+
value.set(Feature::avx10_2 as u32);
305+
}
306+
}
303307
}
304308
}
305309
}

0 commit comments

Comments
 (0)