|
7 | 7 | #include <stdlib.h> |
8 | 8 | #include <inttypes.h> |
9 | 9 | #include <math.h> |
| 10 | + |
| 11 | +#if defined(DYNAMIC_ARCH) |
| 12 | +#define COMBINE(a,b) a ## b |
| 13 | +#define COMBINE2(a,b) COMBINE(a,b) |
| 14 | +#define SGEMM_PREPROCESS_BASE sgemm_direct_sme1_preprocess |
| 15 | +#define SGEMM_PREPROCESS COMBINE2(SGEMM_PREPROCESS_BASE,TS) |
| 16 | +#define SGEMM_DIRECT2X2_BASE sgemm_direct_alpha_beta_sme1_2VLx2VL |
| 17 | +#define SGEMM_DIRECT2X2 COMBINE2(SGEMM_DIRECT2X2_BASE,TS) |
| 18 | +#else |
| 19 | +#define SGEMM_PREPROCESS sgemm_direct_sme1_preprocess |
| 20 | +#define SGEMM_DIRECT2X2 sgemm_direct_alpha_beta_sme1_2VLx2VL |
| 21 | +#endif |
10 | 22 | #if defined(HAVE_SME) |
11 | 23 |
|
12 | 24 | #if defined(__ARM_FEATURE_SME) && defined(__clang__) && __clang_major__ >= 16 |
13 | 25 | #include <arm_sme.h> |
14 | 26 | #endif |
15 | 27 |
|
16 | 28 | /* Function prototypes */ |
17 | | -extern void sgemm_direct_sme1_preprocess(uint64_t nbr, uint64_t nbc,\ |
18 | | - const float * restrict a, float * a_mod) __asm__("sgemm_direct_sme1_preprocess"); |
| 29 | +extern void SGEMM_PREPROCESS(uint64_t nbr, uint64_t nbc,\ |
| 30 | + const float * restrict a, float * a_mod) ; |
19 | 31 |
|
20 | 32 | /* Function Definitions */ |
21 | 33 | static uint64_t sve_cntw() { |
@@ -261,8 +273,8 @@ void CNAME (BLASLONG N, BLASLONG K, float alpha, float * __restrict A, \ |
261 | 273 | /* Pre-process the left matrix to make it suitable for |
262 | 274 | matrix sum of outer-product calculation |
263 | 275 | */ |
264 | | - sgemm_direct_sme1_preprocess(N, K, A, A_mod); |
265 | | - sgemm_direct_sme1_preprocess(N, K, B, B_mod); |
| 276 | + SGEMM_PREPROCESS(N, K, A, A_mod); |
| 277 | + SGEMM_PREPROCESS(N, K, B, B_mod); |
266 | 278 | asm volatile("" : : :"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", |
267 | 279 | "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", |
268 | 280 | "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", |
|
0 commit comments