Skip to content

Commit b63392e

Browse files
DavidMansellgunes-arm
authored andcommitted
feat: Add SME GEMM and GEMV kernels.
Add SME versions of most SME2 kernels. As a side effect, synchronize with the latest upstream arm_gemm repo. This includes deleting a substantial amount of dead files that weren't being used for anything. arm_gemm include files are now stored in a slightly different place, so various other places that refer to these are updated to match. Partially Resolves: COMPMID-8788, ARMCL-1239 Signed-off-by: David Mansell <David.Mansell@arm.com> Change-Id: Iaa6f5a8caa4109098b8b5f9173df0909a4e940d6 Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
1 parent 902f9a7 commit b63392e

File tree

663 files changed

+125147
-100273
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

663 files changed

+125147
-100273
lines changed

.pre-commit-config.yaml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2023-2025 Arm Limited.
1+
# Copyright (c) 2023-2026 Arm Limited.
22
#
33
# SPDX-License-Identifier: MIT
44
#
@@ -31,6 +31,8 @@ exclude: |
3131
src/core/NEON/kernels/convolution/.*|
3232
src/core/NEON/kernels/arm_gemm/.*|
3333
src/core/NEON/kernels/arm_conv/.*|
34+
src/cpu/kernels/assembly/arm_gemm/.*|
35+
src/cpu/kernels/assembly/arm_common/.*|
3436
third_party/.*|
3537
LICENSES/.*
3638
)$
@@ -88,7 +90,7 @@ repos:
8890
name: Fix header guards in ACL
8991
stages: [pre-commit]
9092
language: python
91-
entry: python ./scripts/check_header_guards.py --extensions=h,hh,hpp,inl --comment_style=double_slash --prefix=ACL --exclude=src/core/NEON/kernels/convolution/,src/core/NEON/kernels/arm_gemm/,src/core/NEON/kernels/arm_conv/,include/,third_party/ --add_extension
93+
entry: python ./scripts/check_header_guards.py --extensions=h,hh,hpp,inl --comment_style=double_slash --prefix=ACL --exclude=src/core/NEON/kernels/convolution/,src/core/NEON/kernels/arm_gemm/,src/core/NEON/kernels/arm_conv/,include/,third_party/,src/cpu/kernels/assembly/arm_gemm,src/cpu/kernels/assembly/arm_common --add_extension
9294
pass_filenames: true
9395
- id: pre-push-check
9496
name: Commit Message Check

Android.bp

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
//
2-
// Copyright © 2020-2025 Arm Ltd. All rights reserved.
2+
// Copyright © 2020-2026 Arm Ltd. All rights reserved.
33
// SPDX-License-Identifier: MIT
44
//
55

@@ -345,7 +345,6 @@ cc_library_static {
345345
"src/core/NEON/kernels/arm_gemm/mergeresults-fp16.cpp",
346346
"src/core/NEON/kernels/arm_gemm/mergeresults-sve.cpp",
347347
"src/core/NEON/kernels/arm_gemm/mergeresults.cpp",
348-
"src/core/NEON/kernels/arm_gemm/misc-sve.cpp",
349348
"src/core/NEON/kernels/arm_gemm/misc.cpp",
350349
"src/core/NEON/kernels/arm_gemm/quantized-fp16.cpp",
351350
"src/core/NEON/kernels/arm_gemm/quantized.cpp",
@@ -1286,7 +1285,6 @@ cc_library_static {
12861285
"src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/generic.cpp",
12871286
"src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_mmla_6x16/generic.cpp",
12881287
"src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/generic.cpp",
1289-
"src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/x1.cpp",
12901288
"src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/a510.cpp",
12911289
"src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/generic.cpp",
12921290
"src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/a510.cpp",
@@ -1300,7 +1298,6 @@ cc_library_static {
13001298
"src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/generic.cpp",
13011299
"src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/x1.cpp",
13021300
"src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x6/generic.cpp",
1303-
"src/core/NEON/kernels/arm_gemm/kernels/a64_sgemv_pretransposed/generic.cpp",
13041301
"src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_6x4/generic.cpp",
13051302
"src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_8x4/generic.cpp",
13061303
"src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/a55.cpp",
@@ -1312,7 +1309,6 @@ cc_library_static {
13121309
"src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/a55.cpp",
13131310
"src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/generic.cpp",
13141311
"src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_bf16fp32_dot_16VL/generic.cpp",
1315-
"src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp16_mla_16VL/generic.cpp",
13161312
"src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp16fp32fp16_dot_16VL/generic.cpp",
13171313
"src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp32_mla_16VL/generic.cpp",
13181314
"src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp32bf16fp32_dot_16VL/generic.cpp",
@@ -1342,9 +1338,37 @@ cc_library_static {
13421338
"src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_1VLx4VL/generic.cpp",
13431339
"src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_2VLx2VL/generic.cpp",
13441340
"src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_4VLx1VL/generic.cpp",
1341+
"src/core/NEON/kernels/arm_gemm/kernels/sme_gemv_bf16fp32_dot_8VL/generic.cpp",
1342+
"src/core/NEON/kernels/arm_gemm/kernels/sme_gemv_fp16_mla_8VL/generic.cpp",
1343+
"src/core/NEON/kernels/arm_gemm/kernels/sme_gemv_fp16fp32fp16_mla_8VL_rhs2VL/generic.cpp",
1344+
"src/core/NEON/kernels/arm_gemm/kernels/sme_gemv_fp32_mla_8VL/generic.cpp",
1345+
"src/core/NEON/kernels/arm_gemm/kernels/sme_gemv_fp32bf16fp32_dot_8VL/generic.cpp",
1346+
"src/core/NEON/kernels/arm_gemm/kernels/sme_gemv_s8qa_dot_8VL/generic.cpp",
1347+
"src/core/NEON/kernels/arm_gemm/kernels/sme_gemv_u8qa_dot_8VL/generic.cpp",
1348+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_bf16fp32_mopa_1VLx4VL/generic.cpp",
1349+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_bf16fp32_mopa_2VLx2VL/generic.cpp",
1350+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_bf16fp32_mopa_4VLx1VL/generic.cpp",
1351+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_fp16fp32_mopa_1VLx4VL/generic.cpp",
1352+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_fp16fp32_mopa_2VLx2VL/generic.cpp",
1353+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_fp16fp32_mopa_4VLx1VL/generic.cpp",
1354+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_fp16fp32fp16_mopa_1VLx4VL/generic.cpp",
1355+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_fp16fp32fp16_mopa_2VLx2VL/generic.cpp",
1356+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_fp16fp32fp16_mopa_4VLx1VL/generic.cpp",
13451357
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_fp32_mopa_1VLx4VL/generic.cpp",
13461358
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_fp32_mopa_2VLx2VL/generic.cpp",
13471359
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_fp32_mopa_4VLx1VL/generic.cpp",
1360+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_s8q_mopa_1VLx4VL/generic.cpp",
1361+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_s8q_mopa_2VLx2VL/generic.cpp",
1362+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_s8q_mopa_4VLx1VL/generic.cpp",
1363+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_s8qfp32_mopa_1VLx4VL/generic.cpp",
1364+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_s8qfp32_mopa_2VLx2VL/generic.cpp",
1365+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_s8qfp32_mopa_4VLx1VL/generic.cpp",
1366+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_s8s32_mopa_1VLx4VL/generic.cpp",
1367+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_s8s32_mopa_2VLx2VL/generic.cpp",
1368+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_s8s32_mopa_4VLx1VL/generic.cpp",
1369+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_u8q_mopa_1VLx4VL/generic.cpp",
1370+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_u8q_mopa_2VLx2VL/generic.cpp",
1371+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_u8q_mopa_4VLx1VL/generic.cpp",
13481372
"src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_bf16fp32_mmla_6x4VL/generic.cpp",
13491373
"src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16_mla_6x4VL/a64fx.cpp",
13501374
"src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16_mla_6x4VL/generic.cpp",

REUSE.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# SPDX-FileCopyrightText: 2024-2025 Arm Limited
1+
# SPDX-FileCopyrightText: 2024-2026 Arm Limited
22
#
33
# SPDX-License-Identifier: MIT
44
#
@@ -29,7 +29,7 @@ SPDX-License-Identifier = "MIT"
2929

3030
[[annotations]]
3131
path = ["filelist.json"]
32-
SPDX-FileCopyrightText = "2021-2025 Arm Limited"
32+
SPDX-FileCopyrightText = "2021-2026 Arm Limited"
3333
SPDX-License-Identifier = "MIT"
3434

3535
[[annotations]]

SConscript

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/python
22
# -*- coding: utf-8 -*-
33

4-
# Copyright (c) 2016-2025 Arm Limited.
4+
# Copyright (c) 2016-2026 Arm Limited.
55
#
66
# SPDX-License-Identifier: MIT
77
#
@@ -650,6 +650,8 @@ misa_lib_files_neon_fp16 = []
650650
misa_lib_files_sve_fp16 = []
651651
misa_lib_files_sve2_fp16 = []
652652

653+
arm_compute_env.Append(CPPPATH = ["src/cpu/kernels/assembly/"])
654+
653655
if env['neon']:
654656
# build winograd/depthwise sources for either v7a / v8a
655657
arm_compute_env.Append(CPPPATH = ["src/core/NEON/kernels/arm_gemm",

filelist.json

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1702,7 +1702,6 @@
17021702
"src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8s8s32_dot_6x16/generic.cpp",
17031703
"src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8s8s32_mmla_6x16/generic.cpp",
17041704
"src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/generic.cpp",
1705-
"src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/x1.cpp",
17061705
"src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/generic.cpp",
17071706
"src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/a510.cpp",
17081707
"src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/generic.cpp",
@@ -1760,10 +1759,6 @@
17601759
"src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/a55r1.cpp",
17611760
"src/core/NEON/kernels/arm_gemm/kernels/a32_sgemm_8x6/generic.cpp"
17621761
],
1763-
"estate64": [
1764-
"src/core/NEON/kernels/arm_gemm/kernels/a64_sgemv_pretransposed/generic.cpp"
1765-
1766-
],
17671762
"fixed_format_kernels": [
17681763
"src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_bf16fp32_mmla_6x16/generic.cpp",
17691764
"src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp32_mla_6x16/generic.cpp",
@@ -1778,7 +1773,6 @@
17781773
"common": [
17791774
"src/core/NEON/kernels/arm_gemm/interleave_indirect-sve.cpp",
17801775
"src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_bf16fp32_dot_16VL/generic.cpp",
1781-
"src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp16_mla_16VL/generic.cpp",
17821776
"src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp16fp32fp16_dot_16VL/generic.cpp",
17831777
"src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp32_mla_16VL/generic.cpp",
17841778
"src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp32bf16fp32_dot_16VL/generic.cpp",
@@ -1808,9 +1802,37 @@
18081802
"src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_1VLx4VL/generic.cpp",
18091803
"src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_2VLx2VL/generic.cpp",
18101804
"src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_4VLx1VL/generic.cpp",
1805+
"src/core/NEON/kernels/arm_gemm/kernels/sme_gemv_bf16fp32_dot_8VL/generic.cpp",
1806+
"src/core/NEON/kernels/arm_gemm/kernels/sme_gemv_fp16fp32fp16_mla_8VL_rhs2VL/generic.cpp",
1807+
"src/core/NEON/kernels/arm_gemm/kernels/sme_gemv_fp16_mla_8VL/generic.cpp",
1808+
"src/core/NEON/kernels/arm_gemm/kernels/sme_gemv_fp32bf16fp32_dot_8VL/generic.cpp",
1809+
"src/core/NEON/kernels/arm_gemm/kernels/sme_gemv_fp32_mla_8VL/generic.cpp",
1810+
"src/core/NEON/kernels/arm_gemm/kernels/sme_gemv_s8qa_dot_8VL/generic.cpp",
1811+
"src/core/NEON/kernels/arm_gemm/kernels/sme_gemv_u8qa_dot_8VL/generic.cpp",
1812+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_bf16fp32_mopa_1VLx4VL/generic.cpp",
1813+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_bf16fp32_mopa_2VLx2VL/generic.cpp",
1814+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_bf16fp32_mopa_4VLx1VL/generic.cpp",
1815+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_fp16fp32_mopa_1VLx4VL/generic.cpp",
1816+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_fp16fp32_mopa_2VLx2VL/generic.cpp",
1817+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_fp16fp32_mopa_4VLx1VL/generic.cpp",
1818+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_fp16fp32fp16_mopa_1VLx4VL/generic.cpp",
1819+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_fp16fp32fp16_mopa_2VLx2VL/generic.cpp",
1820+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_fp16fp32fp16_mopa_4VLx1VL/generic.cpp",
18111821
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_fp32_mopa_1VLx4VL/generic.cpp",
18121822
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_fp32_mopa_2VLx2VL/generic.cpp",
18131823
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_fp32_mopa_4VLx1VL/generic.cpp",
1824+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_s8q_mopa_1VLx4VL/generic.cpp",
1825+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_s8q_mopa_2VLx2VL/generic.cpp",
1826+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_s8q_mopa_4VLx1VL/generic.cpp",
1827+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_s8qfp32_mopa_1VLx4VL/generic.cpp",
1828+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_s8qfp32_mopa_2VLx2VL/generic.cpp",
1829+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_s8qfp32_mopa_4VLx1VL/generic.cpp",
1830+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_s8s32_mopa_1VLx4VL/generic.cpp",
1831+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_s8s32_mopa_2VLx2VL/generic.cpp",
1832+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_s8s32_mopa_4VLx1VL/generic.cpp",
1833+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_u8q_mopa_1VLx4VL/generic.cpp",
1834+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_u8q_mopa_2VLx2VL/generic.cpp",
1835+
"src/core/NEON/kernels/arm_gemm/kernels/sme_interleaved_nomerge_u8q_mopa_4VLx1VL/generic.cpp",
18141836
"src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp",
18151837
"src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_mmla_6x4VL/generic.cpp",
18161838
"src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/a64fx.cpp",
@@ -1857,8 +1879,7 @@
18571879
"src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp",
18581880
"src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8s8s32_mmla_8x3VL/generic.cpp",
18591881
"src/core/NEON/kernels/arm_gemm/mergeresults-sve.cpp",
1860-
"src/core/NEON/kernels/arm_gemm/transform-sve.cpp",
1861-
"src/core/NEON/kernels/arm_gemm/misc-sve.cpp"
1882+
"src/core/NEON/kernels/arm_gemm/transform-sve.cpp"
18621883
],
18631884
"fixed_format_kernels": [
18641885
"src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_bf16fp32_mmla_6x4VL/generic.cpp",

scripts/generate_build_files.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/python
22
# -*- coding: utf-8 -*-
33

4-
# Copyright (c) 2023-2025 Arm Limited.
4+
# Copyright (c) 2023-2026 Arm Limited.
55
#
66
# SPDX-License-Identifier: MIT
77
#
@@ -95,7 +95,7 @@ def resolve_operator_dependencies(filelist, operators, backend=''):
9595
return resolved_operators
9696

9797
def get_template_header():
98-
return """# Copyright (c) 2023-2025 Arm Limited.
98+
return """# Copyright (c) 2023-2026 Arm Limited.
9999
#
100100
# SPDX-License-Identifier: MIT
101101
#

0 commit comments

Comments
 (0)