11/*
2- * Copyright (c) 2021-2022, 2024-2025 Arm Limited.
2+ * Copyright (c) 2021-2022, 2024-2026 Arm Limited.
33 *
44 * SPDX-License-Identifier: MIT
55 *
3232#define ARM_COMPUTE_CPU_FEATURE_HWCAP_NEON (1 << 12 )
3333
3434/* Arm64 Feature flags */
35- #define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMD (1 << 1 )
36- #define ARM_COMPUTE_CPU_FEATURE_HWCAP_FPHP (1 << 9 )
37- #define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDHP (1 << 10 )
38- #define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDDP (1 << 20 )
39- #define ARM_COMPUTE_CPU_FEATURE_HWCAP_SVE (1 << 22 )
40- #define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDFHM (1 << 23 )
41- #define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVE2 (1 << 1 )
42- #define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEI8MM (1 << 9 )
43- #define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEF32MM (1 << 10 )
44- #define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEBF16 (1 << 12 )
45- #define ARM_COMPUTE_CPU_FEATURE_HWCAP2_I8MM (1 << 13 )
46- #define ARM_COMPUTE_CPU_FEATURE_HWCAP2_BF16 (1 << 14 )
47- #define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME (1 << 23 )
48- #define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME2 (1ULL << 37 )
35+ #define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMD (1 << 1 )
36+ #define ARM_COMPUTE_CPU_FEATURE_HWCAP_FPHP (1 << 9 )
37+ #define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDHP (1 << 10 )
38+ #define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDDP (1 << 20 )
39+ #define ARM_COMPUTE_CPU_FEATURE_HWCAP_SVE (1 << 22 )
40+ #define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDFHM (1 << 23 )
41+ #define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVE2 (1 << 1 )
42+ #define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEI8MM (1 << 9 )
43+ #define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEF32MM (1 << 10 )
44+ #define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEBF16 (1 << 12 )
45+ #define ARM_COMPUTE_CPU_FEATURE_HWCAP2_I8MM (1 << 13 )
46+ #define ARM_COMPUTE_CPU_FEATURE_HWCAP2_BF16 (1 << 14 )
47+ #define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME (1 << 23 )
48+ #define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME_I8I32 (1 << 26 )
49+ #define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME_F16F32 (1 << 27 )
50+ #define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME_B16F32 (1 << 28 )
51+ #define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME_F32F32 (1 << 29 )
52+ #define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME2 (1ULL << 37 )
4953
5054namespace arm_compute
5155{
@@ -84,10 +88,14 @@ void decode_hwcaps(CpuIsaInfo &isa, const uint64_t hwcaps, const uint64_t hwcaps
8488 isa.svebf16 = is_feature_supported (hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEBF16);
8589
8690 // Instruction extensions
87- isa.dot = is_feature_supported (hwcaps, ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDDP);
88- isa.i8mm = is_feature_supported (hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_I8MM);
89- isa.svei8mm = is_feature_supported (hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEI8MM);
90- isa.svef32mm = is_feature_supported (hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEF32MM);
91+ isa.dot = is_feature_supported (hwcaps, ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDDP);
92+ isa.i8mm = is_feature_supported (hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_I8MM);
93+ isa.sme_b16f32 = is_feature_supported (hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME_B16F32);
94+ isa.sme_f16f32 = is_feature_supported (hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME_F16F32);
95+ isa.sme_f32f32 = is_feature_supported (hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME_F32F32);
96+ isa.sme_i8i32 = is_feature_supported (hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME_I8I32);
97+ isa.svei8mm = is_feature_supported (hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEI8MM);
98+ isa.svef32mm = is_feature_supported (hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEF32MM);
9199}
92100#else /* defined(__aarch64__) */
93101void decode_hwcaps (CpuIsaInfo &isa, const uint64_t hwcaps, const uint64_t hwcaps2)
@@ -101,7 +109,8 @@ void decode_regs(CpuIsaInfo &isa,
101109 const uint64_t isar1,
102110 const uint64_t pfr0,
103111 const uint64_t pfr1,
104- const uint64_t svefr0)
112+ const uint64_t svefr0,
113+ const uint64_t smefr0)
105114{
106115 auto is_supported = [](uint64_t feature_reg, uint8_t feature_pos) -> bool
107116 { return ((feature_reg >> feature_pos) & 0xf ); };
@@ -124,6 +133,12 @@ void decode_regs(CpuIsaInfo &isa,
124133 isa.i8mm = is_supported (isar1, 48 );
125134 isa.svei8mm = is_supported (svefr0, 44 );
126135 isa.svef32mm = is_supported (svefr0, 52 );
136+
137+ // SME features
138+ isa.sme_b16f32 = (smefr0 & (1ULL << 34 ));
139+ isa.sme_f16f32 = (smefr0 & (1ULL << 35 ));
140+ isa.sme_f32f32 = (smefr0 & (1ULL << 32 ));
141+ isa.sme_i8i32 = (((smefr0 >> 36 ) & 0xF ) == 0xF );
127142}
128143
129144/* * Handle features from allow-listed models in case of problematic kernels
@@ -156,12 +171,12 @@ CpuIsaInfo init_cpu_isa_from_hwcaps(uint64_t hwcaps, uint64_t hwcaps2, uint32_t
156171 return isa;
157172}
158173
159- CpuIsaInfo
160- init_cpu_isa_from_regs ( uint64_t isar0, uint64_t isar1, uint64_t pfr0, uint64_t pfr1, uint64_t svefr0, uint64_t midr)
174+ CpuIsaInfo init_cpu_isa_from_regs (
175+ uint64_t isar0, uint64_t isar1, uint64_t pfr0, uint64_t pfr1, uint64_t svefr0, uint64_t smefr0 , uint64_t midr)
161176{
162177 CpuIsaInfo isa;
163178
164- decode_regs (isa, isar0, isar1, pfr0, pfr1, svefr0);
179+ decode_regs (isa, isar0, isar1, pfr0, pfr1, svefr0, smefr0 );
165180
166181 const CpuModel model = midr_to_model (midr);
167182 allowlisted_model_features (isa, model);
0 commit comments