Skip to content

Commit a9a6eda

Browse files
authored
Adapt for DYNAMIC_ARCH with multiple ...preprocess symbols
1 parent 2d46f1e commit a9a6eda

1 file changed

Lines changed: 16 additions & 4 deletions

File tree

kernel/arm64/ssyr2k_direct_alpha_beta_arm64_sme1.c

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,27 @@
77
#include <stdlib.h>
88
#include <inttypes.h>
99
#include <math.h>
10+
11+
#if defined(DYNAMIC_ARCH)
12+
#define COMBINE(a,b) a ## b
13+
#define COMBINE2(a,b) COMBINE(a,b)
14+
#define SGEMM_PREPROCESS_BASE sgemm_direct_sme1_preprocess
15+
#define SGEMM_PREPROCESS COMBINE2(SGEMM_PREPROCESS_BASE,TS)
16+
#define SGEMM_DIRECT2X2_BASE sgemm_direct_alpha_beta_sme1_2VLx2VL
17+
#define SGEMM_DIRECT2X2 COMBINE2(SGEMM_DIRECT2X2_BASE,TS)
18+
#else
19+
#define SGEMM_PREPROCESS sgemm_direct_sme1_preprocess
20+
#define SGEMM_DIRECT2X2 sgemm_direct_alpha_beta_sme1_2VLx2VL
21+
#endif
1022
#if defined(HAVE_SME)
1123

1224
#if defined(__ARM_FEATURE_SME) && defined(__clang__) && __clang_major__ >= 16
1325
#include <arm_sme.h>
1426
#endif
1527

1628
/* Function prototypes */
17-
extern void sgemm_direct_sme1_preprocess(uint64_t nbr, uint64_t nbc,\
18-
const float * restrict a, float * a_mod) __asm__("sgemm_direct_sme1_preprocess");
29+
extern void SGEMM_PREPROCESS(uint64_t nbr, uint64_t nbc,\
30+
const float * restrict a, float * a_mod) ;
1931

2032
/* Function Definitions */
2133
static uint64_t sve_cntw() {
@@ -261,8 +273,8 @@ void CNAME (BLASLONG N, BLASLONG K, float alpha, float * __restrict A, \
261273
/* Pre-process the left matrix to make it suitable for
262274
matrix sum of outer-product calculation
263275
*/
264-
sgemm_direct_sme1_preprocess(N, K, A, A_mod);
265-
sgemm_direct_sme1_preprocess(N, K, B, B_mod);
276+
SGEMM_PREPROCESS(N, K, A, A_mod);
277+
SGEMM_PREPROCESS(N, K, B, B_mod);
266278
asm volatile("" : : :"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7",
267279
"p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15",
268280
"z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7",

0 commit comments

Comments
 (0)