Skip to content

Commit a0bf09f

Browse files
authored
Merge branch 'OpenMathLib:develop' into fix_dyn_armv9sme
2 parents 1e48d04 + 151b742 commit a0bf09f

2 files changed

Lines changed: 22 additions & 7 deletions

File tree

.cirrus.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ task:
5858
- export VALID_ARCHS="i386 x86_64"
5959
- xcrun --sdk macosx --show-sdk-path
6060
- xcodebuild -version
61-
- export CC=/Applications/Xcode_15.4.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
62-
- export CFLAGS="-O2 -unwindlib=none -Wno-macro-redefined -isysroot /Applications/Xcode_15.4.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX14.5.sdk -arch x86_64"
61+
- export CC=/Applications/Xcode_16.3.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
62+
- export CFLAGS="-O2 -unwindlib=none -Wno-macro-redefined -isysroot /Applications/Xcode_16.3.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX15.4.sdk -arch x86_64"
6363
- make TARGET=CORE2 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 RANLIB="ls -l"
6464
always:
6565
config_artifacts:
@@ -78,8 +78,8 @@ task:
7878
- export #PATH=/opt/homebrew/opt/llvm/bin:$PATH
7979
- export #LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
8080
- export #CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
81-
- export CC=/Applications/Xcode_15.4.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
82-
- export CFLAGS="-O2 -unwindlib=none -Wno-macro-redefined -isysroot /Applications/Xcode_15.4.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS17.5.sdk -arch arm64 -miphoneos-version-min=10.0"
81+
- export CC=/Applications/Xcode_16.3.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
82+
- export CFLAGS="-O2 -unwindlib=none -Wno-macro-redefined -isysroot /Applications/Xcode_16.3.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS18.4.sdk -arch arm64 -miphoneos-version-min=10.0"
8383
- xcrun --sdk iphoneos --show-sdk-path
8484
- ls -l /Applications
8585
- make TARGET=ARMV8 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 CROSS=1

kernel/arm64/sgemm_direct_arm64_sme1.c

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
#include <stdlib.h>
88
#include <inttypes.h>
99
#include <math.h>
10-
1110
#if defined(HAVE_SME)
1211

1312
/* Function prototypes */
@@ -44,15 +43,31 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A,\
4443
m_mod = ceil((double)M/(double)vl_elms) * vl_elms;
4544

4645
float *A_mod = (float *) malloc(m_mod*K*sizeof(float));
47-
46+
47+
/* Prevent compiler optimization by reading from memory instead
48+
* of reading directly from vector (z) registers.
49+
* */
50+
asm volatile("" : : :"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7",
51+
"p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15",
52+
"z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7",
53+
"z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15",
54+
"z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23",
55+
"z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31");
56+
4857
/* Pre-process the left matrix to make it suitable for
4958
matrix sum of outer-product calculation
5059
*/
5160
sgemm_direct_sme1_preprocess(M, K, A, A_mod);
5261

5362
/* Calculate C = A*B */
5463
sgemm_direct_sme1_2VLx2VL(M, K, N, A_mod, B, R);
55-
64+
65+
asm volatile("" : : :"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7",
66+
"p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15",
67+
"z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7",
68+
"z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15",
69+
"z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23",
70+
"z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31");
5671
free(A_mod);
5772
}
5873

0 commit comments

Comments
 (0)