Skip to content

Commit db18ecf

Browse files
committed
Require avxvnni for avx10.2
1 parent 859951e commit db18ecf

2 files changed

Lines changed: 42 additions & 34 deletions

File tree

compiler/rustc_target/src/target_features.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -392,7 +392,11 @@ static X86_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
392392
"avx512vpopcntdq",
393393
],
394394
),
395-
("avx10.2", Unstable(sym::avx10_target_feature), &["avx10.1"]),
395+
(
396+
"avx10.2",
397+
Unstable(sym::avx10_target_feature),
398+
&["avx10.1", "avxvnni", "avxvnniint8", "avxvnniint16"],
399+
),
396400
("avx512bf16", Stable, &["avx512bw"]),
397401
("avx512bitalg", Stable, &["avx512bw"]),
398402
("avx512bw", Stable, &["avx512f"]),

library/std_detect/src/detect/os/x86.rs

Lines changed: 37 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,28 @@ pub(crate) fn detect_features() -> cache::Initializer {
202202
// Test `XCR0.APX[19]` with the mask `0b1000_0000_0000_0000_0000 == 0x80000`
203203
let os_apx_support = xcr0 & 0x80000 == 0x80000;
204204

205+
if os_amx_support {
206+
enable(extended_features_edx, 24, Feature::amx_tile);
207+
enable(extended_features_edx, 25, Feature::amx_int8);
208+
enable(extended_features_edx, 22, Feature::amx_bf16);
209+
enable(extended_features_eax_leaf_1, 21, Feature::amx_fp16);
210+
enable(extended_features_edx_leaf_1, 8, Feature::amx_complex);
211+
212+
if max_basic_leaf >= 0x1e {
213+
let CpuidResult { eax: amx_feature_flags_eax, .. } =
214+
__cpuid_count(0x1e_u32, 1);
215+
216+
enable(amx_feature_flags_eax, 4, Feature::amx_fp8);
217+
enable(amx_feature_flags_eax, 6, Feature::amx_tf32);
218+
enable(amx_feature_flags_eax, 7, Feature::amx_avx512);
219+
enable(amx_feature_flags_eax, 8, Feature::amx_movrs);
220+
}
221+
}
222+
223+
if os_apx_support {
224+
enable(extended_features_edx_leaf_1, 21, Feature::apxf);
225+
}
226+
205227
// Only if the OS and the CPU support saving/restoring the AVX
206228
// registers we enable `xsave` support:
207229
if os_avx_support {
@@ -236,9 +258,10 @@ pub(crate) fn detect_features() -> cache::Initializer {
236258
enable(extended_features_ebx, 5, Feature::avx2);
237259

238260
// "Short" versions of AVX512 instructions
239-
enable(extended_features_eax_leaf_1, 4, Feature::avxvnni);
240-
enable(extended_features_eax_leaf_1, 23, Feature::avxifma);
241-
enable(extended_features_edx_leaf_1, 4, Feature::avxvnniint8);
261+
let avxvnni = enable(extended_features_eax_leaf_1, 4, Feature::avxvnni);
262+
let avxvnniint8 = enable(extended_features_eax_leaf_1, 23, Feature::avxifma);
263+
let avxvnniint16 =
264+
enable(extended_features_edx_leaf_1, 4, Feature::avxvnniint8);
242265
enable(extended_features_edx_leaf_1, 5, Feature::avxneconvert);
243266
enable(extended_features_edx_leaf_1, 10, Feature::avxvnniint16);
244267

@@ -269,37 +292,18 @@ pub(crate) fn detect_features() -> cache::Initializer {
269292
enable(extended_features_edx, 8, Feature::avx512vp2intersect);
270293
enable(extended_features_edx, 23, Feature::avx512fp16);
271294
enable(extended_features_eax_leaf_1, 5, Feature::avx512bf16);
272-
}
273-
}
274-
275-
if os_amx_support {
276-
enable(extended_features_edx, 24, Feature::amx_tile);
277-
enable(extended_features_edx, 25, Feature::amx_int8);
278-
enable(extended_features_edx, 22, Feature::amx_bf16);
279-
enable(extended_features_eax_leaf_1, 21, Feature::amx_fp16);
280-
enable(extended_features_edx_leaf_1, 8, Feature::amx_complex);
281-
282-
if max_basic_leaf >= 0x1e {
283-
let CpuidResult { eax: amx_feature_flags_eax, .. } =
284-
__cpuid_count(0x1e_u32, 1);
285-
286-
enable(amx_feature_flags_eax, 4, Feature::amx_fp8);
287-
enable(amx_feature_flags_eax, 6, Feature::amx_tf32);
288-
enable(amx_feature_flags_eax, 7, Feature::amx_avx512);
289-
enable(amx_feature_flags_eax, 8, Feature::amx_movrs);
290-
}
291-
}
292-
293-
if os_apx_support {
294-
enable(extended_features_edx_leaf_1, 21, Feature::apxf);
295-
}
296295

297-
let avx10_1 = enable(extended_features_edx_leaf_1, 19, Feature::avx10_1);
298-
if avx10_1 {
299-
let CpuidResult { ebx, .. } = __cpuid(0x24);
300-
let avx10_version = ebx & 0xff;
301-
if avx10_version >= 2 {
302-
value.set(Feature::avx10_2 as u32);
296+
let avx10_1 = enable(extended_features_edx_leaf_1, 19, Feature::avx10_1);
297+
if avx10_1 {
298+
let CpuidResult { ebx, .. } = __cpuid(0x24);
299+
let avx10_version = ebx & 0xff;
300+
301+
// AVX10.2 supports masked versions of dot-product instructions available in avxvnni etc,
302+
// so it doesn't make sense to have it without the unmasked versions
303+
if avx10_version >= 2 && avxvnni && avxvnniint8 && avxvnniint16 {
304+
value.set(Feature::avx10_2 as u32);
305+
}
306+
}
303307
}
304308
}
305309
}

0 commit comments

Comments
 (0)