Skip to content

Commit 8e50b8d

Browse files
authored
Add d8 to d15 to clobber lists as the code does not expressly save them
1 parent 7f89c6f commit 8e50b8d

1 file changed

Lines changed: 4 additions & 6 deletions

File tree

kernel/arm64/sgemm_direct_alpha_beta_arm64_sme1.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,6 @@ return;
111111
}
112112

113113
__arm_new("za") __arm_locally_streaming
114-
__attribute__((visibility("hidden")))
115114
static void sgemm_direct_alpha_beta_sme1_2VLx2VL(uint64_t m, uint64_t k, uint64_t n, const float* alpha,\
116115
const float *ba, const float *restrict bb, const float* beta,\
117116
float *restrict C) {
@@ -177,11 +176,11 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float alpha, float * __restrict
177176
* of reading directly from vector (z) registers.
178177
* */
179178
asm volatile("" : : :"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7",
180-
"p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15",
179+
"p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
181180
"z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7",
182181
"z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15",
183182
"z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23",
184-
"z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31");
183+
"z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31","za");
185184

186185
/* Pre-process the left matrix to make it suitable for
187186
matrix sum of outer-product calculation
@@ -190,11 +189,11 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float alpha, float * __restrict
190189
SME1_PREPROCESS(M, K, A, A_mod);
191190

192191
asm volatile("" : : :"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7",
193-
"p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15",
192+
"p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15","d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
194193
"z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7",
195194
"z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15",
196195
"z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23",
197-
"z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31");
196+
"z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", "za");
198197

199198
/* Calculate C = alpha*A*B + beta*C */
200199

@@ -210,4 +209,3 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float alpha, float * __restrict
210209
float beta, float * __restrict R, BLASLONG strideR){fprintf(stderr,"empty sgemm_direct_alpha_beta should not be called!!!\n");}
211210

212211
#endif
213-

0 commit comments

Comments
 (0)