Skip to content

Commit 902f9a7

Browse files
DavidMansellgunes-arm
authored andcommitted
feat: Add fine-grained SME feature flags.
The ARM architecture defines distinct features for each outer product data type. Add these features, and detection code for bare metal (via sysregs), Linux (via HWCAP2) and macOS (via sysctl). For Windows, assume that anything supporting SME also supports these features. Signed-off-by: David Mansell <David.Mansell@arm.com> Change-Id: Ia236714d51bb935fa366553dc9ac193d50c8d18a
1 parent f98fc36 commit 902f9a7

File tree

7 files changed

+146
-40
lines changed

7 files changed

+146
-40
lines changed

arm_compute/core/CPP/CPPTypes.h

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2022, 2024-2025 Arm Limited.
2+
* Copyright (c) 2017-2022, 2024-2026 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -148,6 +148,26 @@ class CPUInfo final
148148
* @return true if the cpu supports sme2, false otherwise
149149
*/
150150
bool has_sme2() const;
151+
/** Checks if the cpu model supports sme_i8i32.
152+
*
153+
* @return true if the cpu supports sme_i8i32, false otherwise
154+
*/
155+
bool has_sme_i8i32() const;
156+
/** Checks if the cpu model supports sme_f16f32.
157+
*
158+
* @return true if the cpu supports sme_f16f32, false otherwise
159+
*/
160+
bool has_sme_f16f32() const;
161+
/** Checks if the cpu model supports sme_f32f32.
162+
*
163+
* @return true if the cpu supports sme_f32f32, false otherwise
164+
*/
165+
bool has_sme_f32f32() const;
166+
/** Checks if the cpu model supports sme_b16f32.
167+
*
168+
* @return true if the cpu supports sme_b16f32, false otherwise
169+
*/
170+
bool has_sme_b16f32() const;
151171
/** Gets the cpu model for a given cpuid.
152172
*
153173
* @param[in] cpuid the id of the cpu core to be retrieved,

src/common/cpuinfo/CpuInfo.cpp

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021-2025 Arm Limited.
2+
* Copyright (c) 2021-2026 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -344,6 +344,17 @@ uint64_t get_sve_feature_reg()
344344
: "x3");
345345
return svefr0;
346346
}
347+
348+
uint64_t get_sme_feature_reg()
349+
{
350+
uint64_t smefr0 = 0;
351+
__asm __volatile(".inst 0xd53804a3 // mrs x3, ID_AA64SMFR0_EL1\n"
352+
"MOV %0, X3"
353+
: "=r"(smefr0)
354+
:
355+
: "x3");
356+
return smefr0;
357+
}
347358
#endif /* defined(BARE_METAL) && defined(__aarch64__) */
348359
} // namespace
349360

@@ -405,7 +416,7 @@ CpuInfo CpuInfo::build()
405416
__aarch64__) /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && !defined(__QNX__) && (defined(__arm__) || defined(__aarch64__)) */
406417

407418
// Assume single CPU in bare metal mode. Just read the ID register and feature bits directly.
408-
uint64_t isar0 = 0, isar1 = 0, pfr0 = 0, pfr1 = 0, svefr0 = 0, midr = 0;
419+
uint64_t isar0 = 0, isar1 = 0, pfr0 = 0, pfr1 = 0, svefr0 = 0, smefr0 = 0, midr = 0;
409420
ARM_COMPUTE_GET_FEATURE_REG(isar0, ID_AA64ISAR0_EL1);
410421
ARM_COMPUTE_GET_FEATURE_REG(isar1, ID_AA64ISAR1_EL1);
411422
ARM_COMPUTE_GET_FEATURE_REG(pfr0, ID_AA64PFR0_EL1);
@@ -415,8 +426,12 @@ CpuInfo CpuInfo::build()
415426
{
416427
svefr0 = get_sve_feature_reg();
417428
}
429+
if ((pfr1 >> 24) & 0xf)
430+
{
431+
smefr0 = get_sme_feature_reg();
432+
}
418433

419-
CpuIsaInfo isa = init_cpu_isa_from_regs(isar0, isar1, pfr0, pfr1, svefr0, midr);
434+
CpuIsaInfo isa = init_cpu_isa_from_regs(isar0, isar1, pfr0, pfr1, svefr0, smefr0, midr);
420435
std::vector<CpuModel> cpus_model(1, midr_to_model(midr));
421436
CpuInfo info(isa, cpus_model);
422437
return info;
@@ -425,13 +440,17 @@ CpuInfo CpuInfo::build()
425440
int ncpus = get_hw_capability("hw.perflevel0.logicalcpu");
426441
CpuIsaInfo isainfo;
427442
std::vector<CpuModel> cpus_model(ncpus);
428-
isainfo.neon = get_hw_capability("hw.optional.neon");
429-
isainfo.fp16 = get_hw_capability("hw.optional.neon_fp16");
430-
isainfo.dot = get_hw_capability("hw.optional.arm.FEAT_DotProd");
431-
isainfo.bf16 = get_hw_capability("hw.optional.arm.FEAT_BF16");
432-
isainfo.i8mm = get_hw_capability("hw.optional.arm.FEAT_I8MM");
433-
isainfo.sme = get_hw_capability("hw.optional.arm.FEAT_SME");
434-
isainfo.sme2 = get_hw_capability("hw.optional.arm.FEAT_SME2");
443+
isainfo.neon = get_hw_capability("hw.optional.neon");
444+
isainfo.fp16 = get_hw_capability("hw.optional.neon_fp16");
445+
isainfo.dot = get_hw_capability("hw.optional.arm.FEAT_DotProd");
446+
isainfo.bf16 = get_hw_capability("hw.optional.arm.FEAT_BF16");
447+
isainfo.i8mm = get_hw_capability("hw.optional.arm.FEAT_I8MM");
448+
isainfo.sme = get_hw_capability("hw.optional.arm.FEAT_SME");
449+
isainfo.sme_f32f32 = get_hw_capability("hw.optional.arm.SME_F32F32");
450+
isainfo.sme_b16f32 = get_hw_capability("hw.optional.arm.SME_B16F32");
451+
isainfo.sme_f16f32 = get_hw_capability("hw.optional.arm.SME_F16F32");
452+
isainfo.sme_i8i32 = get_hw_capability("hw.optional.arm.SME_I8I32");
453+
isainfo.sme2 = get_hw_capability("hw.optional.arm.FEAT_SME2");
435454
CpuInfo info(isainfo, cpus_model);
436455
return info;
437456
#elif defined(__aarch64__) && defined(_WIN64) /* #elif defined(__aarch64__) && defined(__APPLE__) */
@@ -441,6 +460,13 @@ CpuInfo CpuInfo::build()
441460
isainfo.sve = IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE);
442461
isainfo.sve2 = IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE);
443462
isainfo.sme = IsProcessorFeaturePresent(PF_ARM_SME_INSTRUCTIONS_AVAILABLE);
463+
464+
// These features are implied by FEAT_SME
465+
isainfo.sme_f32f32 = isainfo.sme;
466+
isainfo.sme_b16f32 = isainfo.sme;
467+
isainfo.sme_f16f32 = isainfo.sme;
468+
isainfo.sme_i8i32 = isainfo.sme;
469+
444470
isainfo.sme2 = IsProcessorFeaturePresent(PF_ARM_SME2_INSTRUCTIONS_AVAILABLE);
445471
isainfo.fhm = false; // constant not found
446472

src/common/cpuinfo/CpuInfo.h

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021-2022, 2024-2025 Arm Limited.
2+
* Copyright (c) 2021-2022, 2024-2026 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -83,6 +83,22 @@ class CpuInfo
8383
{
8484
return _isa.sme2;
8585
}
86+
bool has_sme_i8i32() const
87+
{
88+
return _isa.sme_i8i32;
89+
}
90+
bool has_sme_f16f32() const
91+
{
92+
return _isa.sme_f16f32;
93+
}
94+
bool has_sme_f32f32() const
95+
{
96+
return _isa.sme_f32f32;
97+
}
98+
bool has_sme_b16f32() const
99+
{
100+
return _isa.sme_b16f32;
101+
}
86102
bool has_fp16() const
87103
{
88104
return _isa.fp16;

src/common/cpuinfo/CpuIsaInfo.cpp

Lines changed: 38 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021-2022, 2024-2025 Arm Limited.
2+
* Copyright (c) 2021-2022, 2024-2026 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -32,20 +32,24 @@
3232
#define ARM_COMPUTE_CPU_FEATURE_HWCAP_NEON (1 << 12)
3333

3434
/* Arm64 Feature flags */
35-
#define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMD (1 << 1)
36-
#define ARM_COMPUTE_CPU_FEATURE_HWCAP_FPHP (1 << 9)
37-
#define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDHP (1 << 10)
38-
#define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDDP (1 << 20)
39-
#define ARM_COMPUTE_CPU_FEATURE_HWCAP_SVE (1 << 22)
40-
#define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDFHM (1 << 23)
41-
#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVE2 (1 << 1)
42-
#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEI8MM (1 << 9)
43-
#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEF32MM (1 << 10)
44-
#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEBF16 (1 << 12)
45-
#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_I8MM (1 << 13)
46-
#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_BF16 (1 << 14)
47-
#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME (1 << 23)
48-
#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME2 (1ULL << 37)
35+
#define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMD (1 << 1)
36+
#define ARM_COMPUTE_CPU_FEATURE_HWCAP_FPHP (1 << 9)
37+
#define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDHP (1 << 10)
38+
#define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDDP (1 << 20)
39+
#define ARM_COMPUTE_CPU_FEATURE_HWCAP_SVE (1 << 22)
40+
#define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDFHM (1 << 23)
41+
#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVE2 (1 << 1)
42+
#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEI8MM (1 << 9)
43+
#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEF32MM (1 << 10)
44+
#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEBF16 (1 << 12)
45+
#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_I8MM (1 << 13)
46+
#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_BF16 (1 << 14)
47+
#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME (1 << 23)
48+
#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME_I8I32 (1 << 26)
49+
#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME_F16F32 (1 << 27)
50+
#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME_B16F32 (1 << 28)
51+
#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME_F32F32 (1 << 29)
52+
#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME2 (1ULL << 37)
4953

5054
namespace arm_compute
5155
{
@@ -84,10 +88,14 @@ void decode_hwcaps(CpuIsaInfo &isa, const uint64_t hwcaps, const uint64_t hwcaps
8488
isa.svebf16 = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEBF16);
8589

8690
// Instruction extensions
87-
isa.dot = is_feature_supported(hwcaps, ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDDP);
88-
isa.i8mm = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_I8MM);
89-
isa.svei8mm = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEI8MM);
90-
isa.svef32mm = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEF32MM);
91+
isa.dot = is_feature_supported(hwcaps, ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDDP);
92+
isa.i8mm = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_I8MM);
93+
isa.sme_b16f32 = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME_B16F32);
94+
isa.sme_f16f32 = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME_F16F32);
95+
isa.sme_f32f32 = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME_F32F32);
96+
isa.sme_i8i32 = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME_I8I32);
97+
isa.svei8mm = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEI8MM);
98+
isa.svef32mm = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEF32MM);
9199
}
92100
#else /* defined(__aarch64__) */
93101
void decode_hwcaps(CpuIsaInfo &isa, const uint64_t hwcaps, const uint64_t hwcaps2)
@@ -101,7 +109,8 @@ void decode_regs(CpuIsaInfo &isa,
101109
const uint64_t isar1,
102110
const uint64_t pfr0,
103111
const uint64_t pfr1,
104-
const uint64_t svefr0)
112+
const uint64_t svefr0,
113+
const uint64_t smefr0)
105114
{
106115
auto is_supported = [](uint64_t feature_reg, uint8_t feature_pos) -> bool
107116
{ return ((feature_reg >> feature_pos) & 0xf); };
@@ -124,6 +133,12 @@ void decode_regs(CpuIsaInfo &isa,
124133
isa.i8mm = is_supported(isar1, 48);
125134
isa.svei8mm = is_supported(svefr0, 44);
126135
isa.svef32mm = is_supported(svefr0, 52);
136+
137+
// SME features
138+
isa.sme_b16f32 = (smefr0 & (1ULL << 34));
139+
isa.sme_f16f32 = (smefr0 & (1ULL << 35));
140+
isa.sme_f32f32 = (smefr0 & (1ULL << 32));
141+
isa.sme_i8i32 = (((smefr0 >> 36) & 0xF) == 0xF);
127142
}
128143

129144
/** Handle features from allow-listed models in case of problematic kernels
@@ -156,12 +171,12 @@ CpuIsaInfo init_cpu_isa_from_hwcaps(uint64_t hwcaps, uint64_t hwcaps2, uint32_t
156171
return isa;
157172
}
158173

159-
CpuIsaInfo
160-
init_cpu_isa_from_regs(uint64_t isar0, uint64_t isar1, uint64_t pfr0, uint64_t pfr1, uint64_t svefr0, uint64_t midr)
174+
CpuIsaInfo init_cpu_isa_from_regs(
175+
uint64_t isar0, uint64_t isar1, uint64_t pfr0, uint64_t pfr1, uint64_t svefr0, uint64_t smefr0, uint64_t midr)
161176
{
162177
CpuIsaInfo isa;
163178

164-
decode_regs(isa, isar0, isar1, pfr0, pfr1, svefr0);
179+
decode_regs(isa, isar0, isar1, pfr0, pfr1, svefr0, smefr0);
165180

166181
const CpuModel model = midr_to_model(midr);
167182
allowlisted_model_features(isa, model);

src/common/cpuinfo/CpuIsaInfo.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021-2022, 2024-2025 Arm Limited.
2+
* Copyright (c) 2021-2022, 2024-2026 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -52,6 +52,10 @@ struct CpuIsaInfo
5252
/* Instruction support */
5353
bool dot{false};
5454
bool i8mm{false};
55+
bool sme_b16f32{false};
56+
bool sme_f16f32{false};
57+
bool sme_f32f32{false};
58+
bool sme_i8i32{false};
5559
bool svei8mm{false};
5660
bool svef32mm{false};
5761
};
@@ -73,12 +77,13 @@ CpuIsaInfo init_cpu_isa_from_hwcaps(uint64_t hwcaps, uint64_t hwcaps2, uint32_t
7377
* @param[in] pfr0 Value of Processor Feature Register 0 (ID_AA64PFR0_EL1)
7478
* @param[in] pfr1 Value of Processor Feature Register 1 (ID_AA64PFR1_EL1)
7579
* @param[in] svefr0 Value of SVE feature ID register 0 (ID_AA64ZFR0_EL1)
80+
* @param[in] smefr0 Value of SME feature ID register 0 (ID_AA64SMFR0_EL1)
7681
* @param[in] midr Value of Main ID Register (MIDR)
7782
*
7883
* @return CpuIsaInfo A populated ISA feature structure
7984
*/
80-
CpuIsaInfo
81-
init_cpu_isa_from_regs(uint64_t isar0, uint64_t isar1, uint64_t pfr0, uint64_t pfr1, uint64_t svefr0, uint64_t midr);
85+
CpuIsaInfo init_cpu_isa_from_regs(
86+
uint64_t isar0, uint64_t isar1, uint64_t pfr0, uint64_t pfr1, uint64_t svefr0, uint64_t smefr0, uint64_t midr);
8287
} // namespace cpuinfo
8388
} // namespace arm_compute
8489

src/core/CPP/CPPTypes.cpp

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2018-2022, 2024-2025 Arm Limited.
2+
* Copyright (c) 2018-2022, 2024-2026 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -117,6 +117,26 @@ bool CPUInfo::has_sme2() const
117117
return _impl->info.has_sme2();
118118
}
119119

120+
bool CPUInfo::has_sme_i8i32() const
121+
{
122+
return _impl->info.has_sme_i8i32();
123+
}
124+
125+
bool CPUInfo::has_sme_f16f32() const
126+
{
127+
return _impl->info.has_sme_f16f32();
128+
}
129+
130+
bool CPUInfo::has_sme_f32f32() const
131+
{
132+
return _impl->info.has_sme_f32f32();
133+
}
134+
135+
bool CPUInfo::has_sme_b16f32() const
136+
{
137+
return _impl->info.has_sme_b16f32();
138+
}
139+
120140
CPUModel CPUInfo::get_cpu_model() const
121141
{
122142
return _impl->info.cpu_model();

tests/main.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2025 Arm Limited.
2+
* Copyright (c) 2017-2026 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -253,6 +253,10 @@ int main(int argc, char **argv)
253253
p->print_entry("cpu_has_svei8mm", support::cpp11::to_string(cpu_info.has_svei8mm()));
254254
p->print_entry("cpu_has_svebf16", support::cpp11::to_string(cpu_info.has_svebf16()));
255255
p->print_entry("cpu_has_sme", support::cpp11::to_string(cpu_info.has_sme()));
256+
p->print_entry("cpu_has_sme_f32f32", support::cpp11::to_string(cpu_info.has_sme_f32f32()));
257+
p->print_entry("cpu_has_sme_f16f32", support::cpp11::to_string(cpu_info.has_sme_f16f32()));
258+
p->print_entry("cpu_has_sme_b16f32", support::cpp11::to_string(cpu_info.has_sme_b16f32()));
259+
p->print_entry("cpu_has_sme_i8i32", support::cpp11::to_string(cpu_info.has_sme_i8i32()));
256260
p->print_entry("cpu_has_sme2", support::cpp11::to_string(cpu_info.has_sme2()));
257261
p->print_entry("cpu_has_fp16", support::cpp11::to_string(cpu_info.has_fp16()));
258262
p->print_entry("cpu_has_bf16", support::cpp11::to_string(cpu_info.has_bf16()));

0 commit comments

Comments
 (0)