@@ -202,6 +202,28 @@ pub(crate) fn detect_features() -> cache::Initializer {
202202 // Test `XCR0.APX[19]` with the mask `0b1000_0000_0000_0000_0000 == 0x80000`
203203 let os_apx_support = xcr0 & 0x80000 == 0x80000 ;
204204
205+ if os_amx_support {
206+ enable ( extended_features_edx, 24 , Feature :: amx_tile) ;
207+ enable ( extended_features_edx, 25 , Feature :: amx_int8) ;
208+ enable ( extended_features_edx, 22 , Feature :: amx_bf16) ;
209+ enable ( extended_features_eax_leaf_1, 21 , Feature :: amx_fp16) ;
210+ enable ( extended_features_edx_leaf_1, 8 , Feature :: amx_complex) ;
211+
212+ if max_basic_leaf >= 0x1e {
213+ let CpuidResult { eax : amx_feature_flags_eax, .. } =
214+ __cpuid_count ( 0x1e_u32 , 1 ) ;
215+
216+ enable ( amx_feature_flags_eax, 4 , Feature :: amx_fp8) ;
217+ enable ( amx_feature_flags_eax, 6 , Feature :: amx_tf32) ;
218+ enable ( amx_feature_flags_eax, 7 , Feature :: amx_avx512) ;
219+ enable ( amx_feature_flags_eax, 8 , Feature :: amx_movrs) ;
220+ }
221+ }
222+
223+ if os_apx_support {
224+ enable ( extended_features_edx_leaf_1, 21 , Feature :: apxf) ;
225+ }
226+
205227 // Only if the OS and the CPU support saving/restoring the AVX
206228 // registers we enable `xsave` support:
207229 if os_avx_support {
@@ -236,9 +258,10 @@ pub(crate) fn detect_features() -> cache::Initializer {
236258 enable ( extended_features_ebx, 5 , Feature :: avx2) ;
237259
238260 // "Short" versions of AVX512 instructions
239- enable ( extended_features_eax_leaf_1, 4 , Feature :: avxvnni) ;
240- enable ( extended_features_eax_leaf_1, 23 , Feature :: avxifma) ;
241- enable ( extended_features_edx_leaf_1, 4 , Feature :: avxvnniint8) ;
261+ let avxvnni = enable ( extended_features_eax_leaf_1, 4 , Feature :: avxvnni) ;
262+ let avxvnniint8 = enable ( extended_features_eax_leaf_1, 23 , Feature :: avxifma) ;
263+ let avxvnniint16 =
264+ enable ( extended_features_edx_leaf_1, 4 , Feature :: avxvnniint8) ;
242265 enable ( extended_features_edx_leaf_1, 5 , Feature :: avxneconvert) ;
243266 enable ( extended_features_edx_leaf_1, 10 , Feature :: avxvnniint16) ;
244267
@@ -269,37 +292,18 @@ pub(crate) fn detect_features() -> cache::Initializer {
269292 enable ( extended_features_edx, 8 , Feature :: avx512vp2intersect) ;
270293 enable ( extended_features_edx, 23 , Feature :: avx512fp16) ;
271294 enable ( extended_features_eax_leaf_1, 5 , Feature :: avx512bf16) ;
272- }
273- }
274-
275- if os_amx_support {
276- enable ( extended_features_edx, 24 , Feature :: amx_tile) ;
277- enable ( extended_features_edx, 25 , Feature :: amx_int8) ;
278- enable ( extended_features_edx, 22 , Feature :: amx_bf16) ;
279- enable ( extended_features_eax_leaf_1, 21 , Feature :: amx_fp16) ;
280- enable ( extended_features_edx_leaf_1, 8 , Feature :: amx_complex) ;
281-
282- if max_basic_leaf >= 0x1e {
283- let CpuidResult { eax : amx_feature_flags_eax, .. } =
284- __cpuid_count ( 0x1e_u32 , 1 ) ;
285-
286- enable ( amx_feature_flags_eax, 4 , Feature :: amx_fp8) ;
287- enable ( amx_feature_flags_eax, 6 , Feature :: amx_tf32) ;
288- enable ( amx_feature_flags_eax, 7 , Feature :: amx_avx512) ;
289- enable ( amx_feature_flags_eax, 8 , Feature :: amx_movrs) ;
290- }
291- }
292-
293- if os_apx_support {
294- enable ( extended_features_edx_leaf_1, 21 , Feature :: apxf) ;
295- }
296295
297- let avx10_1 = enable ( extended_features_edx_leaf_1, 19 , Feature :: avx10_1) ;
298- if avx10_1 {
299- let CpuidResult { ebx, .. } = __cpuid ( 0x24 ) ;
300- let avx10_version = ebx & 0xff ;
301- if avx10_version >= 2 {
302- value. set ( Feature :: avx10_2 as u32 ) ;
296+ let avx10_1 = enable ( extended_features_edx_leaf_1, 19 , Feature :: avx10_1) ;
297+ if avx10_1 {
298+ let CpuidResult { ebx, .. } = __cpuid ( 0x24 ) ;
299+ let avx10_version = ebx & 0xff ;
300+
301+ // AVX10.2 supports masked versions of dot-product instructions available in avxvnni etc,
302+ // so it doesn't make sense to have it without the unmasked versions
303+ if avx10_version >= 2 && avxvnni && avxvnniint8 && avxvnniint16 {
304+ value. set ( Feature :: avx10_2 as u32 ) ;
305+ }
306+ }
303307 }
304308 }
305309 }
0 commit comments