Skip to content

Commit b185c9a

Browse files
authored
small fixes for separating sme and dummy parts
1 parent 4af1870 commit b185c9a

3 files changed

Lines changed: 18 additions & 9 deletions

File tree

kernel/arm64/sgemm_direct_alpha_beta_arm64_sme1.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,6 @@
88
#include <inttypes.h>
99
#include <math.h>
1010
#include "sme_abi.h"
11-
#if defined(HAVE_SME)
12-
13-
#if defined(__ARM_FEATURE_SME) && defined(__clang__) && __clang_major__ >= 16
14-
#include <arm_sme.h>
15-
#endif
1611

1712
#if defined(DYNAMIC_ARCH)
1813
#define COMBINE(a,b) a ## b
@@ -25,10 +20,17 @@
2520
#define SME1_PREPROCESS sgemm_direct_sme1_preprocess
2621
#define SME1_KERNEL2X2 sgemm_direct_alpha_beta_sme1_2VLx2VL
2722
#endif
23+
2824
/* Function prototypes */
2925
extern void SME1_PREPROCESS(uint64_t nbr, uint64_t nbc,\
3026
const float * restrict a, float * a_mod);
3127

28+
#if defined(HAVE_SME)
29+
30+
#if defined(__ARM_FEATURE_SME) && defined(__clang__) && __clang_major__ >= 16
31+
#include <arm_sme.h>
32+
#endif
33+
3234
/* Function Definitions */
3335
static uint64_t sve_cntw() {
3436
uint64_t cnt;
@@ -210,5 +212,4 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float alpha, float * __restrict
210212
void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float alpha, float * __restrict A,\
211213
BLASLONG strideA, float * __restrict B, BLASLONG strideB ,\
212214
float beta, float * __restrict R, BLASLONG strideR){fprintf(stderr,"empty sgemm_direct_alpha_beta should not be called!!!\n");}
213-
214215
#endif

kernel/arm64/sgemm_direct_arm64_sme1.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
#include <stdlib.h>
88
#include <inttypes.h>
99
#include <math.h>
10-
#if defined(HAVE_SME)
1110
#if defined(DYNAMIC_ARCH)
1211
#define COMBINE(a,b) a ## b
1312
#define COMBINE2(a,b) COMBINE(a,b)
@@ -19,6 +18,7 @@
1918
#define SME1_PREPROCESS sgemm_direct_sme1_preprocess
2019
#define SME1_DIRECT2X2 sgemm_direct_sme1_2VLx2VL
2120
#endif
21+
#if defined(HAVE_SME)
2222
/* Function prototypes */
2323
extern void SME1_PREPROCESS(uint64_t nbr, uint64_t nbc,\
2424
const float * restrict a, float * a_mod) ;
@@ -87,6 +87,14 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A,\
8787
BLASLONG strideA, float * __restrict B, BLASLONG strideB ,\
8888
float * __restrict R, BLASLONG strideR){
8989
fprintf(stderr,"EMPTY sgemm_kernel_direct should never be called \n");
90-
}
90+
}
91+
void SME1_DIRECT2X2( uint64_t M , uint64_t K, uint64_t N,\
92+
const float * restrict A_base,\
93+
const float * restrict B_base,\
94+
const float * restrict C_base){};
95+
void SME1_PREPROCESS(uint64_t nbr, uint64_t nbc,\
96+
const float * restrict a, float * a_mod){};
97+
98+
9199
#endif
92100

kernel/arm64/sgemm_direct_performant.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K)
66
if (M<3) return 0;
77
unsigned long long mnk = M * N * K;
88
/* benchmark performance on M4 peaks around 512 and crosses the graph of the NEON SGEMM at about 3100 */
9-
if (mnk >= 3100 * 3100 * 3100)
9+
if (mnk >= 3100L * 3100L * 3100L)
1010
return 0;
1111

1212
return 1;

0 commit comments

Comments
 (0)