Skip to content

Commit a683287

Browse files
authored
rework for dynamic_arch
1 parent b185c9a commit a683287

1 file changed

Lines changed: 20 additions & 5 deletions

File tree

kernel/arm64/ssyrk_direct_alpha_beta_arm64_sme1.c

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,26 @@
99
#include <math.h>
1010
#if defined(HAVE_SME)
1111

12+
#if defined(DYNAMIC_ARCH)
13+
#define COMBINE(a,b) a ## b
14+
#define COMBINE2(a,b) COMBINE(a,b)
15+
#define SGEMM_PREPROCESS_BASE sgemm_direct_sme1_preprocess
16+
#define SGEMM_PREPROCESS COMBINE2(SGEMM_PREPROCESS_BASE,TS)
17+
#define SGEMM_DIRECT2X2_BASE sgemm_direct_alpha_beta_sme1_2VLx2VL
18+
#define SGEMM_DIRECT2X2 COMBINE2(SGEMM_DIRECT2X2_BASE,TS)
19+
#else
20+
#define SGEMM_PREPROCESS sgemm_direct_sme1_preprocess
21+
#define SGEMM_DIRECT2X2 sgemm_direct_alpha_beta_sme1_2VLx2VL
22+
#endif
23+
1224
#if defined(__ARM_FEATURE_SME) && defined(__clang__) && __clang_major__ >= 16
1325
#include <arm_sme.h>
1426
#endif
1527

1628
/* Function prototypes */
17-
extern void sgemm_direct_sme1_preprocess(uint64_t nbr, uint64_t nbc,\
18-
const float * restrict a, float * a_mod) __asm__("sgemm_direct_sme1_preprocess");
29+
extern void SGEMM_PREPROCESS (uint64_t nbr, uint64_t nbc,\
30+
31+
const float * restrict a, float * a_mod) ;
1932

2033
/* Function Definitions */
2134
static uint64_t sve_cntw() {
@@ -227,7 +240,7 @@ void CNAME (BLASLONG N, BLASLONG K, float alpha, float * __restrict A,\
227240
/* Pre-process the left matrix to make it suitable for
228241
matrix sum of outer-product calculation
229242
*/
230-
sgemm_direct_sme1_preprocess(N, K, A, A_mod);
243+
SGEMM_PREPROCESS (N, K, A, A_mod);
231244
asm volatile("" : : :"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7",
232245
"p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15",
233246
"z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7",
@@ -245,6 +258,8 @@ void CNAME (BLASLONG N, BLASLONG K, float alpha, float * __restrict A,\
245258
#else
246259

247260
void CNAME (BLASLONG N, BLASLONG K, float alpha, float * __restrict A,\
248-
BLASLONG strideA, float beta, float * __restrict C, BLASLONG strideC){}
249-
261+
BLASLONG strideA, float beta, float * __restrict C, BLASLONG strideC){
262+
fprintf(stderr,"empty ssyrk_direct kernel should never be called\n");
263+
}
264+
250265
#endif

0 commit comments

Comments
 (0)